JSON-Lang/grammar.py

from enum import auto, IntEnum
from re import compile, Pattern

from typing import Collection, Mapping, Sequence, Tuple

class Tok(IntEnum):
	"""
	All possible tokens used in the grammar
	"""
	Newline = auto()
	Whitespace = auto()
	PredicateSection = auto()
	VariablesSection = auto()
	ConstantsSection = auto()
	FunctionsSection = auto()
	ClausesSection = auto()
	Negate = auto()
	OpenP = auto()
	CloseP = auto()
	Comma = auto()
	Identifier = auto()
	Eof = auto()

	def __repr__(self):
		return self._name_

LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
	(compile(r"\n"), Tok.Newline),
	(compile(r"[ \t]+"), Tok.Whitespace),
	(compile("Predicates:"), Tok.PredicateSection),
	(compile("Variables:"), Tok.VariablesSection),
	(compile("Constants:"), Tok.ConstantsSection),
	(compile("Functions:"), Tok.FunctionsSection),
	(compile("Clauses:"), Tok.ClausesSection),
	(compile("!"), Tok.Negate),
	(compile(r"\("), Tok.OpenP),
	(compile(r"\)"), Tok.CloseP),
	(compile(","), Tok.Comma),
	(compile(r"\w+"), Tok.Identifier),
]
"""
A mapping of regexs to the tokens the identify

Tokens earlier on in the list should be regarded as higher priority, even if a match lower
on the list also matches.  All unicode strings should be matched by at least one token.
"""

class Variable(IntEnum):
	Start = auto()
	Idents = auto()
	Clauses = auto()
	Clauses_ = auto()
	Clause = auto()
	Clause_ = auto()
	Term = auto()
	Func = auto()
	CSTerms = auto()

	def __repr__(self) -> str:
		return f'<{self._name_}>'

GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
	(Variable.Start,
		 [ Tok.PredicateSection, Variable.Idents, Tok.Newline
		 , Tok.VariablesSection, Variable.Idents, Tok.Newline
		 , Tok.ConstantsSection, Variable.Idents, Tok.Newline
		 , Tok.FunctionsSection, Variable.Idents, Tok.Newline
		 , Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),

	(Variable.Idents,
		[ Tok.Identifier, Variable.Idents ]),
	(Variable.Idents,
		[ ]),

	(Variable.Clauses,
		[ Tok.Newline, Variable.Clauses_ ]),
	(Variable.Clauses,
		[ ]),

	(Variable.Clauses_,
		[ Variable.Clause, Variable.Clauses ]),
	(Variable.Clauses_,
		[ ]),

	(Variable.Clause,
		[ Variable.Term, Variable.Clause_ ]),

	(Variable.Clause_,
		[ Variable.Clause ]),
	(Variable.Clause_,
		[ ]),

	(Variable.Term,
		[ Tok.Negate, Variable.Term ]),
	(Variable.Term,
		[ Tok.Identifier, Variable.Func ]),

	(Variable.Func,
		[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
	(Variable.Func,
		[ ]),

	(Variable.CSTerms,
		[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
	(Variable.CSTerms,
		[ ]),
]
"""
Implements the following grammar:

Start	 := PredicateSection <Idents> Newline
			VariablesSection <Idents> Newline
			ConstantsSection <Idents> Newline
			FunctionsSection <Idents> Newline
			ClausesSection <Clauses> Eof

Idents	 := Identifier <Idents>
		 := ε

Clauses  := Newline <Clauses'>
		 := ε

Clauses' := <Clause> <Clauses>
		 := ε

Clause	 := <Term> <Clause'>

Clause'  := <Clause>
		 := ε

Term	 := Negate <Term>
		 := Identifier <Func?>

Func?	 := OpenP <Term> <CSTerms> CloseP
		 := ε

CSTerms  := Comma <Term> <CSTerms>
		 := ε
"""