JSON-Lang/grammar.py

300 lines
9.3 KiB
Python

"""
A grammar for parsing CNF files
If this module is run directly in python, it will spit out valid python which produces an
oracle table for the grammar it defines. It's recommended that this be done using
`build_oracle.sh` instead, however, which will build a whole python module containing the
oracle table, complete with imports.
"""
from emis_funky_funktions import *
from dataclasses import dataclass
from enum import auto, IntEnum
from re import compile, Pattern
from build_oracle import oracle_table
from ir import IRNeg, IRProp, IRTerm, IRVar, KnowledgeBase
from lex import Lexeme, tokenize
from parse import Action, parser
from tokens import *
from typing import Any, Callable, cast, Collection, Mapping, Sequence, Tuple, TypeAlias
class Variable(IntEnum):
Start = auto()
Idents = auto()
Clauses = auto()
Clauses_ = auto()
Clause = auto()
Clause_ = auto()
Term = auto()
Func = auto()
CSTerms = auto()
def __repr__(self) -> str:
return f'<{self._name_}>'
ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
class IdentKind(IntEnum):
Function = auto()
Constant = auto()
Variable = auto()
Predicate = auto()
@dataclass(frozen=True)
class CallingNonFunc:
term: Lexeme[Tok]
obj_type: IdentKind
def __str__(self):
return f'Semantic error: Attempted to call {repr(self.term.matched_string)} (a {self.obj_type.name.lower()}) with arguments on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
@dataclass(frozen=True)
class MissingArguments:
term: Lexeme[Tok]
def __str__(self):
return f'Semantic error: The function {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end} is missing arguments!'
@dataclass(frozen=True)
class UnidentifiedVariable:
term: Lexeme[Tok]
def __str__(self):
return f'Semantic error: Unidentified identifier {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
@dataclass(frozen=True)
class PropUsedInObjectContext:
term: Lexeme[Tok]
def __str__(self):
return f'Semantic error: The proposition {repr(self.term.matched_string)} was used in a context where an object was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
@dataclass(frozen=True)
class ObjectUsedInPropContext:
term: Lexeme[Tok]
obj_type: IdentKind
def __str__(self):
return f'Semantic error: The {self.obj_type.name.lower()} {repr(self.term.matched_string)} was used in a context where a proposition was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
@dataclass(frozen=True)
class NegationOfObject:
line: int
col: int
def __str__(self):
return f'Semantic error: Attempted to use negation in a context where working on objects on line {self.line}:{self.col}'
GenIrError: TypeAlias = CallingNonFunc | MissingArguments | UnidentifiedVariable | PropUsedInObjectContext | ObjectUsedInPropContext | NegationOfObject
@dataclass(frozen=True)
class IdentBindings:
predicate_idents: Sequence[str]
variable_idents: Sequence[str]
const_idents: Sequence[str]
func_idents: Sequence[str]
@dataclass(frozen=True)
class ASTNegated:
neg_lexeme: Lexeme[Tok]
term: ASTTerm
def make_ir(self, idents: IdentBindings, is_prop: bool) -> 'Result[IRTerm, GenIrError]':
if is_prop:
return map_res(IRNeg, self.term.make_ir(idents, True))
else:
return Err(NegationOfObject(self.neg_lexeme.line, self.neg_lexeme.col_start))
@dataclass(frozen=True)
class ASTProp:
ident: Lexeme[Tok]
arguments: Sequence[ASTTerm]
def make_ir(self, idents: IdentBindings, is_pred: bool) -> 'Result[IRTerm, GenIrError]':
bound_type = (
IdentKind.Predicate
if self.ident.matched_string in idents.predicate_idents else
IdentKind.Variable
if self.ident.matched_string in idents.variable_idents else
IdentKind.Constant
if self.ident.matched_string in idents.const_idents else
IdentKind.Function
if self.ident.matched_string in idents.func_idents else
None
)
if bound_type is None:
return Err(UnidentifiedVariable(self.ident))
if is_pred:
if bound_type != IdentKind.Predicate:
return Err(ObjectUsedInPropContext(self.ident, bound_type))
else:
if bound_type == IdentKind.Function:
if not len(self.arguments):
return Err(MissingArguments(self.ident))
elif bound_type == IdentKind.Predicate:
return Err(PropUsedInObjectContext(self.ident))
else:
if len(self.arguments):
return Err(CallingNonFunc(self.ident, bound_type))
if bound_type == IdentKind.Variable:
return Ok(IRVar(self.ident.matched_string))
else:
return (sequence([t.make_ir(idents, False) for t in self.arguments])
<= cast(Callable[[Iterable[IRTerm]], Tuple[IRTerm, ...]], tuple))\
<= p(IRProp, self.ident.matched_string)
@cur2
def make_ir(
idents: IdentBindings,
clauses: Sequence[Sequence[ASTTerm]],
) -> Result[KnowledgeBase, GenIrError]:
return map_res(
lambda kb_: FSet(FSet(clause) for clause in kb_),
sequence([sequence([term.make_ir(idents, True) for term in clause]) for clause in clauses])
)
def cons(stack: Sequence[Any]) -> Sequence[Any]:
match stack:
case [rest, head, *popped_stack]:
return ((head, *rest), *popped_stack)
case bad_stack:
raise Exception("Unexpected stack state!", bad_stack)
nil: Sequence[Any] = tuple()
@cur2
def introduce(
cons: Any,
stack: Sequence[Any]
) -> Sequence[Any]:
return (cons, *stack)
def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
match stack:
case [arg, func, *popped_stack] if hasattr(func, '__call__'):
return (func(arg), *popped_stack)
raise Exception("Unexpected stack state!", stack)
@cur2
def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
match stack:
case [arg, *popped_stack]:
return (func(arg), *popped_stack)
case bad_stack:
raise Exception("Unexpected stack state!", bad_stack)
def drop(stack: Sequence[Any]) -> Sequence[Any]:
return stack[1:]
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
(Variable.Start,
[ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,IdentBindings)), Tok.Newline, drop
, Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
, Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
, Tok.FunctionsSection, drop, Variable.Idents, f_apply, call_func(make_ir), Tok.Newline, drop
, Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
(Variable.Idents,
[ Tok.Identifier, call_func(lambda i: i.matched_string), Variable.Idents, cons ]),
(Variable.Idents,
[ introduce(nil) ]),
(Variable.Clauses,
[ Tok.Newline, drop, Variable.Clauses_ ]),
(Variable.Clauses,
[ introduce(nil) ]),
(Variable.Clauses_,
[ Variable.Clause, Variable.Clauses, cons ]),
(Variable.Clauses_,
[ introduce(nil) ]),
(Variable.Clause,
[ Variable.Term, Variable.Clause_, cons ]),
(Variable.Clause_,
[ Variable.Clause ]),
(Variable.Clause_,
[ introduce(nil) ]),
(Variable.Term,
[ Tok.Negate, call_func(cur2(ASTNegated)), Variable.Term, f_apply ]),
(Variable.Term,
[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
(Variable.Func,
[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
(Variable.Func,
[ introduce(nil) ]),
(Variable.CSTerms,
[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
(Variable.CSTerms,
[ introduce(nil) ]),
]
"""
Implements the following grammar:
Start := PredicateSection <Idents> Newline
VariablesSection <Idents> Newline
ConstantsSection <Idents> Newline
FunctionsSection <Idents> Newline
ClausesSection <Clauses> Eof
Idents := Identifier <Idents>
:= ε
Clauses := Newline <Clauses'>
:= ε
Clauses' := <Clause> <Clauses>
:= ε
Clause := <Term> <Clause'>
Clause' := <Clause>
:= ε
Term := Negate <Term>
:= Identifier <Func?>
Func? := OpenP <Term> <CSTerms> CloseP
:= ε
CSTerms := Comma <Term> <CSTerms>
:= ε
"""
def lex_and_parse(input: str) -> Result[Result[KnowledgeBase, GenIrError], Tuple[Lexeme[Tok], Collection[Tok]]]:
lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, input))
oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
match parser_(lexemes):
case Ok([Ok(ir)]):
return Ok(Ok(ir))
case Ok([Err(err)]):
return Ok(Err(err))
case Ok(huh):
raise Exception('Unexpected end result: ', huh)
case Err(e_tup):
return Err(e_tup) #type:ignore
raise Exception('Unreachable')
if __name__ == '__main__':
# from emis_funky_funktions import cur2, flip
# from build_oracle import print_oracle_table_enum, oracle_table
# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
import sys
if len(sys.argv) == 2:
with open(sys.argv[1]) as file:
match lex_and_parse(file.read()):
case Ok(Ok(ir)):
print('\n'.join(' or '.join(str(t) for t in c) for c in ir))
case Ok(Err(err)):
print(err)
exit(102)
case Err((Lexeme(token, text, line, col_start, col_end), expected)):
print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')
exit(101)
else:
print(f'Usage: python {sys.argv[0]} <cnf-file-to-parse>')
exit(100)