45 lines
1.2 KiB
Python
45 lines
1.2 KiB
Python
from enum import auto, IntEnum
|
|
from typing import Collection, Tuple
|
|
from re import compile, Pattern
|
|
|
|
class Tok(IntEnum):
|
|
"""
|
|
All possible tokens used in the grammar
|
|
"""
|
|
Newline = auto()
|
|
Whitespace = auto()
|
|
PredicateSection = auto()
|
|
VariablesSection = auto()
|
|
ConstantsSection = auto()
|
|
FunctionsSection = auto()
|
|
ClausesSection = auto()
|
|
Negate = auto()
|
|
OpenP = auto()
|
|
CloseP = auto()
|
|
Comma = auto()
|
|
Identifier = auto()
|
|
Eof = auto()
|
|
|
|
def __repr__(self):
|
|
return self._name_
|
|
|
|
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
|
(compile(r"\n"), Tok.Newline),
|
|
(compile(r"[ \t]+"), Tok.Whitespace),
|
|
(compile("Predicates:"), Tok.PredicateSection),
|
|
(compile("Variables:"), Tok.VariablesSection),
|
|
(compile("Constants:"), Tok.ConstantsSection),
|
|
(compile("Functions:"), Tok.FunctionsSection),
|
|
(compile("Clauses:"), Tok.ClausesSection),
|
|
(compile("!"), Tok.Negate),
|
|
(compile(r"\("), Tok.OpenP),
|
|
(compile(r"\)"), Tok.CloseP),
|
|
(compile(","), Tok.Comma),
|
|
(compile(r"\w+"), Tok.Identifier),
|
|
]
|
|
"""
|
|
A mapping of regexs to the tokens the identify
|
|
|
|
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
|
on the list also matches. All unicode strings should be matched by at least one token.
|
|
""" |