JSON-Lang/grammar.py

"""
A grammar for parsing CNF files

If this module is run directly in python, it will spit out valid python which produces an
oracle table for the grammar it defines.  It's recommended that this be done using
`build_oracle.sh` instead, however, which will build a whole python module containing the
oracle table, complete with imports.
"""
from enum import auto, IntEnum
from re import compile, Pattern

from typing import Collection, Mapping, Sequence, Tuple

class Tok(IntEnum):
	"""
	All possible tokens used in the grammar
	"""
	Newline = auto()
	Whitespace = auto()
	PredicateSection = auto()
	VariablesSection = auto()
	ConstantsSection = auto()
	FunctionsSection = auto()
	ClausesSection = auto()
	Negate = auto()
	OpenP = auto()
	CloseP = auto()
	Comma = auto()
	Identifier = auto()
	Eof = auto()

	def __repr__(self):
		return self._name_

LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
	(compile(r"\n"), Tok.Newline),
	(compile(r"[ \t]+"), Tok.Whitespace),
	(compile("Predicates:"), Tok.PredicateSection),
	(compile("Variables:"), Tok.VariablesSection),
	(compile("Constants:"), Tok.ConstantsSection),
	(compile("Functions:"), Tok.FunctionsSection),
	(compile("Clauses:"), Tok.ClausesSection),
	(compile("!"), Tok.Negate),
	(compile(r"\("), Tok.OpenP),
	(compile(r"\)"), Tok.CloseP),
	(compile(","), Tok.Comma),
	(compile(r"\w+"), Tok.Identifier),
]
"""
A mapping of regexs to the tokens the identify

Tokens earlier on in the list should be regarded as higher priority, even if a match lower
on the list also matches.  All unicode strings should be matched by at least one token.
"""

class Variable(IntEnum):
	Start = auto()
	Idents = auto()
	Clauses = auto()
	Clauses_ = auto()
	Clause = auto()
	Clause_ = auto()
	Term = auto()
	Func = auto()
	CSTerms = auto()

	def __repr__(self) -> str:
		return f'<{self._name_}>'

GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
	(Variable.Start,
		 [ Tok.PredicateSection, Variable.Idents, Tok.Newline
		 , Tok.VariablesSection, Variable.Idents, Tok.Newline
		 , Tok.ConstantsSection, Variable.Idents, Tok.Newline
		 , Tok.FunctionsSection, Variable.Idents, Tok.Newline
		 , Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),

	(Variable.Idents,
		[ Tok.Identifier, Variable.Idents ]),
	(Variable.Idents,
		[ ]),

	(Variable.Clauses,
		[ Tok.Newline, Variable.Clauses_ ]),
	(Variable.Clauses,
		[ ]),

	(Variable.Clauses_,
		[ Variable.Clause, Variable.Clauses ]),
	(Variable.Clauses_,
		[ ]),

	(Variable.Clause,
		[ Variable.Term, Variable.Clause_ ]),

	(Variable.Clause_,
		[ Variable.Clause ]),
	(Variable.Clause_,
		[ ]),

	(Variable.Term,
		[ Tok.Negate, Variable.Term ]),
	(Variable.Term,
		[ Tok.Identifier, Variable.Func ]),

	(Variable.Func,
		[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
	(Variable.Func,
		[ ]),

	(Variable.CSTerms,
		[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
	(Variable.CSTerms,
		[ ]),
]
"""
Implements the following grammar:

Start	 := PredicateSection <Idents> Newline
			VariablesSection <Idents> Newline
			ConstantsSection <Idents> Newline
			FunctionsSection <Idents> Newline
			ClausesSection <Clauses> Eof

Idents	 := Identifier <Idents>
		 := ε

Clauses  := Newline <Clauses'>
		 := ε

Clauses' := <Clause> <Clauses>
		 := ε

Clause	 := <Term> <Clause'>

Clause'  := <Clause>
		 := ε

Term	 := Negate <Term>
		 := Identifier <Func?>

Func?	 := OpenP <Term> <CSTerms> CloseP
		 := ε

CSTerms  := Comma <Term> <CSTerms>
		 := ε
"""

if __name__ == '__main__':
	from emis_funky_funktions import cur2, flip
	from build_oracle import print_oracle_table_enum, oracle_table
	print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore