JSON-Lang/grammar.py

139 lines
3 KiB
Python

from enum import auto, IntEnum
from re import compile, Pattern
from typing import Collection, Mapping, Sequence, Tuple
class Tok(IntEnum):
"""
All possible tokens used in the grammar
"""
Newline = auto()
Whitespace = auto()
PredicateSection = auto()
VariablesSection = auto()
ConstantsSection = auto()
FunctionsSection = auto()
ClausesSection = auto()
Negate = auto()
OpenP = auto()
CloseP = auto()
Comma = auto()
Identifier = auto()
Eof = auto()
def __repr__(self):
return self._name_
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
(compile(r"\n"), Tok.Newline),
(compile(r"[ \t]+"), Tok.Whitespace),
(compile("Predicates:"), Tok.PredicateSection),
(compile("Variables:"), Tok.VariablesSection),
(compile("Constants:"), Tok.ConstantsSection),
(compile("Functions:"), Tok.FunctionsSection),
(compile("Clauses:"), Tok.ClausesSection),
(compile("!"), Tok.Negate),
(compile(r"\("), Tok.OpenP),
(compile(r"\)"), Tok.CloseP),
(compile(","), Tok.Comma),
(compile(r"\w+"), Tok.Identifier),
]
"""
A mapping of regexs to the tokens the identify
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
on the list also matches. All unicode strings should be matched by at least one token.
"""
class Variable(IntEnum):
Start = auto()
Idents = auto()
Clauses = auto()
Clauses_ = auto()
Clause = auto()
Clause_ = auto()
Term = auto()
Func = auto()
CSTerms = auto()
def __repr__(self) -> str:
return f'<{self._name_}>'
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
(Variable.Start,
[ Tok.PredicateSection, Variable.Idents, Tok.Newline
, Tok.VariablesSection, Variable.Idents, Tok.Newline
, Tok.ConstantsSection, Variable.Idents, Tok.Newline
, Tok.FunctionsSection, Variable.Idents, Tok.Newline
, Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
(Variable.Idents,
[ Tok.Identifier, Variable.Idents ]),
(Variable.Idents,
[ ]),
(Variable.Clauses,
[ Tok.Newline, Variable.Clauses_ ]),
(Variable.Clauses,
[ ]),
(Variable.Clauses_,
[ Variable.Clause, Variable.Clauses ]),
(Variable.Clauses_,
[ ]),
(Variable.Clause,
[ Variable.Term, Variable.Clause_ ]),
(Variable.Clause_,
[ Variable.Clause ]),
(Variable.Clause_,
[ ]),
(Variable.Term,
[ Tok.Negate, Variable.Term ]),
(Variable.Term,
[ Tok.Identifier, Variable.Func ]),
(Variable.Func,
[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
(Variable.Func,
[ ]),
(Variable.CSTerms,
[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
(Variable.CSTerms,
[ ]),
]
"""
Implements the following grammar:
Start := PredicateSection <Idents> Newline
VariablesSection <Idents> Newline
ConstantsSection <Idents> Newline
FunctionsSection <Idents> Newline
ClausesSection <Clauses> Eof
Idents := Identifier <Idents>
:= ε
Clauses := Newline <Clauses'>
:= ε
Clauses' := <Clause> <Clauses>
:= ε
Clause := <Term> <Clause'>
Clause' := <Clause>
:= ε
Term := Negate <Term>
:= Identifier <Func?>
Func? := OpenP <Term> <CSTerms> CloseP
:= ε
CSTerms := Comma <Term> <CSTerms>
:= ε
"""