139 lines
3 KiB
Python
139 lines
3 KiB
Python
|
from enum import auto, IntEnum
|
||
|
from re import compile, Pattern
|
||
|
|
||
|
from typing import Collection, Mapping, Sequence, Tuple
|
||
|
|
||
|
class Tok(IntEnum):
|
||
|
"""
|
||
|
All possible tokens used in the grammar
|
||
|
"""
|
||
|
Newline = auto()
|
||
|
Whitespace = auto()
|
||
|
PredicateSection = auto()
|
||
|
VariablesSection = auto()
|
||
|
ConstantsSection = auto()
|
||
|
FunctionsSection = auto()
|
||
|
ClausesSection = auto()
|
||
|
Negate = auto()
|
||
|
OpenP = auto()
|
||
|
CloseP = auto()
|
||
|
Comma = auto()
|
||
|
Identifier = auto()
|
||
|
Eof = auto()
|
||
|
|
||
|
def __repr__(self):
|
||
|
return self._name_
|
||
|
|
||
|
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
||
|
(compile(r"\n"), Tok.Newline),
|
||
|
(compile(r"[ \t]+"), Tok.Whitespace),
|
||
|
(compile("Predicates:"), Tok.PredicateSection),
|
||
|
(compile("Variables:"), Tok.VariablesSection),
|
||
|
(compile("Constants:"), Tok.ConstantsSection),
|
||
|
(compile("Functions:"), Tok.FunctionsSection),
|
||
|
(compile("Clauses:"), Tok.ClausesSection),
|
||
|
(compile("!"), Tok.Negate),
|
||
|
(compile(r"\("), Tok.OpenP),
|
||
|
(compile(r"\)"), Tok.CloseP),
|
||
|
(compile(","), Tok.Comma),
|
||
|
(compile(r"\w+"), Tok.Identifier),
|
||
|
]
|
||
|
"""
|
||
|
A mapping of regexs to the tokens the identify
|
||
|
|
||
|
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
||
|
on the list also matches. All unicode strings should be matched by at least one token.
|
||
|
"""
|
||
|
|
||
|
class Variable(IntEnum):
|
||
|
Start = auto()
|
||
|
Idents = auto()
|
||
|
Clauses = auto()
|
||
|
Clauses_ = auto()
|
||
|
Clause = auto()
|
||
|
Clause_ = auto()
|
||
|
Term = auto()
|
||
|
Func = auto()
|
||
|
CSTerms = auto()
|
||
|
|
||
|
def __repr__(self) -> str:
|
||
|
return f'<{self._name_}>'
|
||
|
|
||
|
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
|
||
|
(Variable.Start,
|
||
|
[ Tok.PredicateSection, Variable.Idents, Tok.Newline
|
||
|
, Tok.VariablesSection, Variable.Idents, Tok.Newline
|
||
|
, Tok.ConstantsSection, Variable.Idents, Tok.Newline
|
||
|
, Tok.FunctionsSection, Variable.Idents, Tok.Newline
|
||
|
, Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
|
||
|
|
||
|
(Variable.Idents,
|
||
|
[ Tok.Identifier, Variable.Idents ]),
|
||
|
(Variable.Idents,
|
||
|
[ ]),
|
||
|
|
||
|
(Variable.Clauses,
|
||
|
[ Tok.Newline, Variable.Clauses_ ]),
|
||
|
(Variable.Clauses,
|
||
|
[ ]),
|
||
|
|
||
|
(Variable.Clauses_,
|
||
|
[ Variable.Clause, Variable.Clauses ]),
|
||
|
(Variable.Clauses_,
|
||
|
[ ]),
|
||
|
|
||
|
(Variable.Clause,
|
||
|
[ Variable.Term, Variable.Clause_ ]),
|
||
|
|
||
|
(Variable.Clause_,
|
||
|
[ Variable.Clause ]),
|
||
|
(Variable.Clause_,
|
||
|
[ ]),
|
||
|
|
||
|
(Variable.Term,
|
||
|
[ Tok.Negate, Variable.Term ]),
|
||
|
(Variable.Term,
|
||
|
[ Tok.Identifier, Variable.Func ]),
|
||
|
|
||
|
(Variable.Func,
|
||
|
[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
|
||
|
(Variable.Func,
|
||
|
[ ]),
|
||
|
|
||
|
(Variable.CSTerms,
|
||
|
[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
|
||
|
(Variable.CSTerms,
|
||
|
[ ]),
|
||
|
]
|
||
|
"""
|
||
|
Implements the following grammar:
|
||
|
|
||
|
Start := PredicateSection <Idents> Newline
|
||
|
VariablesSection <Idents> Newline
|
||
|
ConstantsSection <Idents> Newline
|
||
|
FunctionsSection <Idents> Newline
|
||
|
ClausesSection <Clauses> Eof
|
||
|
|
||
|
Idents := Identifier <Idents>
|
||
|
:= ε
|
||
|
|
||
|
Clauses := Newline <Clauses'>
|
||
|
:= ε
|
||
|
|
||
|
Clauses' := <Clause> <Clauses>
|
||
|
:= ε
|
||
|
|
||
|
Clause := <Term> <Clause'>
|
||
|
|
||
|
Clause' := <Clause>
|
||
|
:= ε
|
||
|
|
||
|
Term := Negate <Term>
|
||
|
:= Identifier <Func?>
|
||
|
|
||
|
Func? := OpenP <Term> <CSTerms> CloseP
|
||
|
:= ε
|
||
|
|
||
|
CSTerms := Comma <Term> <CSTerms>
|
||
|
:= ε
|
||
|
"""
|