from enum import auto, IntEnum from typing import Collection, Tuple from re import compile, Pattern class Tok(IntEnum): """ All possible tokens used in the grammar """ Newline = auto() Whitespace = auto() PredicateSection = auto() VariablesSection = auto() ConstantsSection = auto() FunctionsSection = auto() ClausesSection = auto() Negate = auto() OpenP = auto() CloseP = auto() Comma = auto() Identifier = auto() Eof = auto() def __repr__(self): return self._name_ LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [ (compile(r"\n"), Tok.Newline), (compile(r"[ \t]+"), Tok.Whitespace), (compile("Predicates:"), Tok.PredicateSection), (compile("Variables:"), Tok.VariablesSection), (compile("Constants:"), Tok.ConstantsSection), (compile("Functions:"), Tok.FunctionsSection), (compile("Clauses:"), Tok.ClausesSection), (compile("!"), Tok.Negate), (compile(r"\("), Tok.OpenP), (compile(r"\)"), Tok.CloseP), (compile(","), Tok.Comma), (compile(r"\w+"), Tok.Identifier), ] """ A mapping of regexs to the tokens the identify Tokens earlier on in the list should be regarded as higher priority, even if a match lower on the list also matches. All unicode strings should be matched by at least one token. """