from enum import auto, IntEnum from re import compile, Pattern from typing import Collection, Mapping, Sequence, Tuple class Tok(IntEnum): """ All possible tokens used in the grammar """ Newline = auto() Whitespace = auto() PredicateSection = auto() VariablesSection = auto() ConstantsSection = auto() FunctionsSection = auto() ClausesSection = auto() Negate = auto() OpenP = auto() CloseP = auto() Comma = auto() Identifier = auto() Eof = auto() def __repr__(self): return self._name_ LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [ (compile(r"\n"), Tok.Newline), (compile(r"[ \t]+"), Tok.Whitespace), (compile("Predicates:"), Tok.PredicateSection), (compile("Variables:"), Tok.VariablesSection), (compile("Constants:"), Tok.ConstantsSection), (compile("Functions:"), Tok.FunctionsSection), (compile("Clauses:"), Tok.ClausesSection), (compile("!"), Tok.Negate), (compile(r"\("), Tok.OpenP), (compile(r"\)"), Tok.CloseP), (compile(","), Tok.Comma), (compile(r"\w+"), Tok.Identifier), ] """ A mapping of regexs to the tokens the identify Tokens earlier on in the list should be regarded as higher priority, even if a match lower on the list also matches. All unicode strings should be matched by at least one token. """ class Variable(IntEnum): Start = auto() Idents = auto() Clauses = auto() Clauses_ = auto() Clause = auto() Clause_ = auto() Term = auto() Func = auto() CSTerms = auto() def __repr__(self) -> str: return f'<{self._name_}>' GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [ (Variable.Start, [ Tok.PredicateSection, Variable.Idents, Tok.Newline , Tok.VariablesSection, Variable.Idents, Tok.Newline , Tok.ConstantsSection, Variable.Idents, Tok.Newline , Tok.FunctionsSection, Variable.Idents, Tok.Newline , Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ), (Variable.Idents, [ Tok.Identifier, Variable.Idents ]), (Variable.Idents, [ ]), (Variable.Clauses, [ Tok.Newline, Variable.Clauses_ ]), (Variable.Clauses, [ ]), (Variable.Clauses_, [ Variable.Clause, Variable.Clauses ]), (Variable.Clauses_, [ ]), (Variable.Clause, [ Variable.Term, Variable.Clause_ ]), (Variable.Clause_, [ Variable.Clause ]), (Variable.Clause_, [ ]), (Variable.Term, [ Tok.Negate, Variable.Term ]), (Variable.Term, [ Tok.Identifier, Variable.Func ]), (Variable.Func, [ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]), (Variable.Func, [ ]), (Variable.CSTerms, [ Tok.Comma, Variable.Term, Variable.CSTerms ]), (Variable.CSTerms, [ ]), ] """ Implements the following grammar: Start := PredicateSection Newline VariablesSection Newline ConstantsSection Newline FunctionsSection Newline ClausesSection Eof Idents := Identifier := ε Clauses := Newline := ε Clauses' := := ε Clause := Clause' := := ε Term := Negate := Identifier Func? := OpenP CloseP := ε CSTerms := Comma := ε """