300 lines
9.3 KiB
Python
300 lines
9.3 KiB
Python
"""
|
|
A grammar for parsing CNF files
|
|
|
|
If this module is run directly in python, it will spit out valid python which produces an
|
|
oracle table for the grammar it defines. It's recommended that this be done using
|
|
`build_oracle.sh` instead, however, which will build a whole python module containing the
|
|
oracle table, complete with imports.
|
|
"""
|
|
from emis_funky_funktions import *
|
|
|
|
from dataclasses import dataclass
|
|
from enum import auto, IntEnum
|
|
from re import compile, Pattern
|
|
|
|
from build_oracle import oracle_table
|
|
from ir import IRNeg, IRProp, IRTerm, IRVar, KnowledgeBase
|
|
from lex import Lexeme, tokenize
|
|
from parse import Action, parser
|
|
from tokens import *
|
|
|
|
from typing import Any, Callable, cast, Collection, Mapping, Sequence, Tuple, TypeAlias
|
|
|
|
|
|
class Variable(IntEnum):
|
|
Start = auto()
|
|
Idents = auto()
|
|
Clauses = auto()
|
|
Clauses_ = auto()
|
|
Clause = auto()
|
|
Clause_ = auto()
|
|
Term = auto()
|
|
Func = auto()
|
|
CSTerms = auto()
|
|
|
|
def __repr__(self) -> str:
|
|
return f'<{self._name_}>'
|
|
|
|
ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
|
|
|
|
class IdentKind(IntEnum):
|
|
Function = auto()
|
|
Constant = auto()
|
|
Variable = auto()
|
|
Predicate = auto()
|
|
|
|
@dataclass(frozen=True)
|
|
class CallingNonFunc:
|
|
term: Lexeme[Tok]
|
|
obj_type: IdentKind
|
|
def __str__(self):
|
|
return f'Semantic error: Attempted to call {repr(self.term.matched_string)} (a {self.obj_type.name.lower()}) with arguments on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
|
|
|
|
@dataclass(frozen=True)
|
|
class MissingArguments:
|
|
term: Lexeme[Tok]
|
|
def __str__(self):
|
|
return f'Semantic error: The function {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end} is missing arguments!'
|
|
|
|
@dataclass(frozen=True)
|
|
class UnidentifiedVariable:
|
|
term: Lexeme[Tok]
|
|
def __str__(self):
|
|
return f'Semantic error: Unidentified identifier {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
|
|
|
|
@dataclass(frozen=True)
|
|
class PropUsedInObjectContext:
|
|
term: Lexeme[Tok]
|
|
def __str__(self):
|
|
return f'Semantic error: The proposition {repr(self.term.matched_string)} was used in a context where an object was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
|
|
|
|
@dataclass(frozen=True)
|
|
class ObjectUsedInPropContext:
|
|
term: Lexeme[Tok]
|
|
obj_type: IdentKind
|
|
def __str__(self):
|
|
return f'Semantic error: The {self.obj_type.name.lower()} {repr(self.term.matched_string)} was used in a context where a proposition was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
|
|
|
|
@dataclass(frozen=True)
|
|
class NegationOfObject:
|
|
line: int
|
|
col: int
|
|
def __str__(self):
|
|
return f'Semantic error: Attempted to use negation in a context where working on objects on line {self.line}:{self.col}'
|
|
|
|
GenIrError: TypeAlias = CallingNonFunc | MissingArguments | UnidentifiedVariable | PropUsedInObjectContext | ObjectUsedInPropContext | NegationOfObject
|
|
|
|
@dataclass(frozen=True)
|
|
class IdentBindings:
|
|
predicate_idents: Sequence[str]
|
|
variable_idents: Sequence[str]
|
|
const_idents: Sequence[str]
|
|
func_idents: Sequence[str]
|
|
|
|
@dataclass(frozen=True)
|
|
class ASTNegated:
|
|
neg_lexeme: Lexeme[Tok]
|
|
term: ASTTerm
|
|
|
|
def make_ir(self, idents: IdentBindings, is_prop: bool) -> 'Result[IRTerm, GenIrError]':
|
|
if is_prop:
|
|
return map_res(IRNeg, self.term.make_ir(idents, True))
|
|
else:
|
|
return Err(NegationOfObject(self.neg_lexeme.line, self.neg_lexeme.col_start))
|
|
|
|
@dataclass(frozen=True)
|
|
class ASTProp:
|
|
ident: Lexeme[Tok]
|
|
arguments: Sequence[ASTTerm]
|
|
|
|
def make_ir(self, idents: IdentBindings, is_pred: bool) -> 'Result[IRTerm, GenIrError]':
|
|
bound_type = (
|
|
IdentKind.Predicate
|
|
if self.ident.matched_string in idents.predicate_idents else
|
|
IdentKind.Variable
|
|
if self.ident.matched_string in idents.variable_idents else
|
|
IdentKind.Constant
|
|
if self.ident.matched_string in idents.const_idents else
|
|
IdentKind.Function
|
|
if self.ident.matched_string in idents.func_idents else
|
|
None
|
|
)
|
|
if bound_type is None:
|
|
return Err(UnidentifiedVariable(self.ident))
|
|
|
|
if is_pred:
|
|
if bound_type != IdentKind.Predicate:
|
|
return Err(ObjectUsedInPropContext(self.ident, bound_type))
|
|
else:
|
|
if bound_type == IdentKind.Function:
|
|
if not len(self.arguments):
|
|
return Err(MissingArguments(self.ident))
|
|
elif bound_type == IdentKind.Predicate:
|
|
return Err(PropUsedInObjectContext(self.ident))
|
|
else:
|
|
if len(self.arguments):
|
|
return Err(CallingNonFunc(self.ident, bound_type))
|
|
|
|
if bound_type == IdentKind.Variable:
|
|
return Ok(IRVar(self.ident.matched_string))
|
|
else:
|
|
return (sequence([t.make_ir(idents, False) for t in self.arguments])
|
|
<= cast(Callable[[Iterable[IRTerm]], Tuple[IRTerm, ...]], tuple))\
|
|
<= p(IRProp, self.ident.matched_string)
|
|
|
|
@cur2
|
|
def make_ir(
|
|
idents: IdentBindings,
|
|
clauses: Sequence[Sequence[ASTTerm]],
|
|
) -> Result[KnowledgeBase, GenIrError]:
|
|
return map_res(
|
|
lambda kb_: FSet(FSet(clause) for clause in kb_),
|
|
sequence([sequence([term.make_ir(idents, True) for term in clause]) for clause in clauses])
|
|
)
|
|
|
|
def cons(stack: Sequence[Any]) -> Sequence[Any]:
|
|
match stack:
|
|
case [rest, head, *popped_stack]:
|
|
return ((head, *rest), *popped_stack)
|
|
case bad_stack:
|
|
raise Exception("Unexpected stack state!", bad_stack)
|
|
|
|
nil: Sequence[Any] = tuple()
|
|
@cur2
|
|
def introduce(
|
|
cons: Any,
|
|
stack: Sequence[Any]
|
|
) -> Sequence[Any]:
|
|
return (cons, *stack)
|
|
|
|
def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
|
|
match stack:
|
|
case [arg, func, *popped_stack] if hasattr(func, '__call__'):
|
|
return (func(arg), *popped_stack)
|
|
raise Exception("Unexpected stack state!", stack)
|
|
@cur2
|
|
def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
|
|
match stack:
|
|
case [arg, *popped_stack]:
|
|
return (func(arg), *popped_stack)
|
|
case bad_stack:
|
|
raise Exception("Unexpected stack state!", bad_stack)
|
|
|
|
def drop(stack: Sequence[Any]) -> Sequence[Any]:
|
|
return stack[1:]
|
|
|
|
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
|
|
(Variable.Start,
|
|
[ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,IdentBindings)), Tok.Newline, drop
|
|
, Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
|
, Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
|
, Tok.FunctionsSection, drop, Variable.Idents, f_apply, call_func(make_ir), Tok.Newline, drop
|
|
, Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
|
|
|
|
(Variable.Idents,
|
|
[ Tok.Identifier, call_func(lambda i: i.matched_string), Variable.Idents, cons ]),
|
|
(Variable.Idents,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Clauses,
|
|
[ Tok.Newline, drop, Variable.Clauses_ ]),
|
|
(Variable.Clauses,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Clauses_,
|
|
[ Variable.Clause, Variable.Clauses, cons ]),
|
|
(Variable.Clauses_,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Clause,
|
|
[ Variable.Term, Variable.Clause_, cons ]),
|
|
|
|
(Variable.Clause_,
|
|
[ Variable.Clause ]),
|
|
(Variable.Clause_,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Term,
|
|
[ Tok.Negate, call_func(cur2(ASTNegated)), Variable.Term, f_apply ]),
|
|
(Variable.Term,
|
|
[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
|
|
|
|
(Variable.Func,
|
|
[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
|
|
(Variable.Func,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.CSTerms,
|
|
[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
|
|
(Variable.CSTerms,
|
|
[ introduce(nil) ]),
|
|
]
|
|
"""
|
|
Implements the following grammar:
|
|
|
|
Start := PredicateSection <Idents> Newline
|
|
VariablesSection <Idents> Newline
|
|
ConstantsSection <Idents> Newline
|
|
FunctionsSection <Idents> Newline
|
|
ClausesSection <Clauses> Eof
|
|
|
|
Idents := Identifier <Idents>
|
|
:= ε
|
|
|
|
Clauses := Newline <Clauses'>
|
|
:= ε
|
|
|
|
Clauses' := <Clause> <Clauses>
|
|
:= ε
|
|
|
|
Clause := <Term> <Clause'>
|
|
|
|
Clause' := <Clause>
|
|
:= ε
|
|
|
|
Term := Negate <Term>
|
|
:= Identifier <Func?>
|
|
|
|
Func? := OpenP <Term> <CSTerms> CloseP
|
|
:= ε
|
|
|
|
CSTerms := Comma <Term> <CSTerms>
|
|
:= ε
|
|
"""
|
|
|
|
def lex_and_parse(input: str) -> Result[Result[KnowledgeBase, GenIrError], Tuple[Lexeme[Tok], Collection[Tok]]]:
|
|
lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, input))
|
|
|
|
oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
|
|
parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
|
|
match parser_(lexemes):
|
|
case Ok([Ok(ir)]):
|
|
return Ok(Ok(ir))
|
|
case Ok([Err(err)]):
|
|
return Ok(Err(err))
|
|
case Ok(huh):
|
|
raise Exception('Unexpected end result: ', huh)
|
|
case Err(e_tup):
|
|
return Err(e_tup) #type:ignore
|
|
raise Exception('Unreachable')
|
|
|
|
if __name__ == '__main__':
|
|
# from emis_funky_funktions import cur2, flip
|
|
# from build_oracle import print_oracle_table_enum, oracle_table
|
|
# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
|
|
import sys
|
|
|
|
if len(sys.argv) == 2:
|
|
with open(sys.argv[1]) as file:
|
|
match lex_and_parse(file.read()):
|
|
case Ok(Ok(ir)):
|
|
print('\n'.join(' or '.join(str(t) for t in c) for c in ir))
|
|
case Ok(Err(err)):
|
|
print(err)
|
|
exit(102)
|
|
case Err((Lexeme(token, text, line, col_start, col_end), expected)):
|
|
print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')
|
|
exit(101)
|
|
else:
|
|
print(f'Usage: python {sys.argv[0]} <cnf-file-to-parse>')
|
|
exit(100) |