278 lines
8.4 KiB
Python
278 lines
8.4 KiB
Python
"""
|
|
A grammar for parsing CNF files
|
|
|
|
If this module is run directly in python, it will spit out valid python which produces an
|
|
oracle table for the grammar it defines. It's recommended that this be done using
|
|
`build_oracle.sh` instead, however, which will build a whole python module containing the
|
|
oracle table, complete with imports.
|
|
"""
|
|
from emis_funky_funktions import *
|
|
|
|
from dataclasses import dataclass
|
|
from enum import auto, IntEnum
|
|
from re import compile, Pattern
|
|
|
|
from lex import Lexeme
|
|
from parse import Action
|
|
|
|
from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
|
|
|
|
class Tok(IntEnum):
|
|
"""
|
|
All possible tokens used in the grammar
|
|
"""
|
|
Newline = auto()
|
|
Whitespace = auto()
|
|
PredicateSection = auto()
|
|
VariablesSection = auto()
|
|
ConstantsSection = auto()
|
|
FunctionsSection = auto()
|
|
ClausesSection = auto()
|
|
Negate = auto()
|
|
OpenP = auto()
|
|
CloseP = auto()
|
|
Comma = auto()
|
|
Identifier = auto()
|
|
Eof = auto()
|
|
|
|
def __repr__(self):
|
|
return self._name_
|
|
|
|
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
|
(compile(r"\n"), Tok.Newline),
|
|
(compile(r"[ \t]+"), Tok.Whitespace),
|
|
(compile("Predicates:"), Tok.PredicateSection),
|
|
(compile("Variables:"), Tok.VariablesSection),
|
|
(compile("Constants:"), Tok.ConstantsSection),
|
|
(compile("Functions:"), Tok.FunctionsSection),
|
|
(compile("Clauses:"), Tok.ClausesSection),
|
|
(compile("!"), Tok.Negate),
|
|
(compile(r"\("), Tok.OpenP),
|
|
(compile(r"\)"), Tok.CloseP),
|
|
(compile(","), Tok.Comma),
|
|
(compile(r"\w+"), Tok.Identifier),
|
|
]
|
|
"""
|
|
A mapping of regexs to the tokens the identify
|
|
|
|
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
|
on the list also matches. All unicode strings should be matched by at least one token.
|
|
"""
|
|
|
|
class Variable(IntEnum):
|
|
Start = auto()
|
|
Idents = auto()
|
|
Clauses = auto()
|
|
Clauses_ = auto()
|
|
Clause = auto()
|
|
Clause_ = auto()
|
|
Term = auto()
|
|
Func = auto()
|
|
CSTerms = auto()
|
|
|
|
def __repr__(self) -> str:
|
|
return f'<{self._name_}>'
|
|
|
|
ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
|
|
|
|
@dataclass(frozen=True)
|
|
class ArgumentsForVariable:
|
|
term: Lexeme[Tok]
|
|
|
|
@dataclass(frozen=True)
|
|
class UnidentifiedVariable:
|
|
term: Lexeme[Tok]
|
|
|
|
GenIrError: TypeAlias = ArgumentsForVariable | UnidentifiedVariable
|
|
|
|
@dataclass(frozen=True)
|
|
class ASTNegated:
|
|
term: ASTTerm
|
|
|
|
def make_ir(self, props: Sequence[str], var: Sequence[str]) -> 'Result[IRTerm, GenIrError]':
|
|
return map_res(IRNeg, self.term.make_ir(props, var))
|
|
|
|
@dataclass(frozen=True)
|
|
class ASTProp:
|
|
ident: Lexeme[Tok]
|
|
arguments: Sequence[ASTTerm]
|
|
|
|
def make_ir(self, props: Sequence[str], vars: Sequence[str]) -> 'Result[IRTerm, GenIrError]':
|
|
if self.ident.matched_string in props:
|
|
return map_res(p(IRProp, self.ident), sequence([t.make_ir(props, vars) for t in self.arguments]))
|
|
elif self.ident.matched_string in vars:
|
|
if len(self.arguments):
|
|
return Err(ArgumentsForVariable(self.ident))
|
|
else:
|
|
return Ok(IRVar(self.ident))
|
|
else:
|
|
return Err(UnidentifiedVariable(self.ident))
|
|
|
|
@dataclass(frozen=True)
|
|
class IRProp:
|
|
lexeme: Lexeme[Tok]
|
|
arguments: 'Sequence[IRTerm]'
|
|
def __str__(self) -> str:
|
|
return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
|
|
|
|
@dataclass(frozen=True)
|
|
class IRVar:
|
|
lexeme: Lexeme[Tok]
|
|
def __str__(self) -> str:
|
|
return f'*{self.lexeme.matched_string}'
|
|
|
|
@dataclass(frozen=True)
|
|
class IRNeg:
|
|
inner: 'IRTerm'
|
|
def __str__(self) -> str:
|
|
return f'¬{self.inner}'
|
|
|
|
IRTerm: TypeAlias = IRVar | IRProp | IRNeg
|
|
|
|
def make_ir(
|
|
predicate_idents: Sequence[Lexeme[Tok]],
|
|
variable_idents: Sequence[Lexeme[Tok]],
|
|
const_idents: Sequence[Lexeme[Tok]],
|
|
func_idents: Sequence[Lexeme[Tok]],
|
|
clauses: Sequence[Sequence[ASTTerm]],
|
|
) -> Result[Sequence[Sequence[IRTerm]], GenIrError]:
|
|
prop_idents = [l.matched_string for l in (*const_idents, *func_idents, *predicate_idents)]
|
|
var_idents = [l.matched_string for l in variable_idents]
|
|
return sequence([sequence([term.make_ir(prop_idents, var_idents) for term in clause]) for clause in clauses])
|
|
|
|
def cons(stack: Sequence[Any]) -> Sequence[Any]:
|
|
match stack:
|
|
case [rest, head, *popped_stack]:
|
|
return ((head, *rest), *popped_stack)
|
|
case bad_stack:
|
|
raise Exception("Unexpected stack state!", bad_stack)
|
|
|
|
nil: Sequence[Any] = tuple()
|
|
@cur2
|
|
def introduce(
|
|
cons: Any,
|
|
stack: Sequence[Any]
|
|
) -> Sequence[Any]:
|
|
return (cons, *stack)
|
|
|
|
def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
|
|
match stack:
|
|
case [arg, func, *popped_stack] if hasattr(func, '__call__'):
|
|
return (func(arg), *popped_stack)
|
|
raise Exception("Unexpected stack state!", stack)
|
|
@cur2
|
|
def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
|
|
match stack:
|
|
case [arg, *popped_stack]:
|
|
return (func(arg), *popped_stack)
|
|
case bad_stack:
|
|
raise Exception("Unexpected stack state!", bad_stack)
|
|
|
|
def drop(stack: Sequence[Any]) -> Sequence[Any]:
|
|
return stack[1:]
|
|
|
|
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
|
|
(Variable.Start,
|
|
[ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,make_ir)), Tok.Newline, drop
|
|
, Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
|
, Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
|
, Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
|
, Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
|
|
|
|
(Variable.Idents,
|
|
[ Tok.Identifier, Variable.Idents, cons ]),
|
|
(Variable.Idents,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Clauses,
|
|
[ Tok.Newline, drop, Variable.Clauses_ ]),
|
|
(Variable.Clauses,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Clauses_,
|
|
[ Variable.Clause, Variable.Clauses, cons ]),
|
|
(Variable.Clauses_,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Clause,
|
|
[ Variable.Term, Variable.Clause_, cons ]),
|
|
|
|
(Variable.Clause_,
|
|
[ Variable.Clause ]),
|
|
(Variable.Clause_,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Term,
|
|
[ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]),
|
|
(Variable.Term,
|
|
[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
|
|
|
|
(Variable.Func,
|
|
[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
|
|
(Variable.Func,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.CSTerms,
|
|
[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
|
|
(Variable.CSTerms,
|
|
[ introduce(nil) ]),
|
|
]
|
|
"""
|
|
Implements the following grammar:
|
|
|
|
Start := PredicateSection <Idents> Newline
|
|
VariablesSection <Idents> Newline
|
|
ConstantsSection <Idents> Newline
|
|
FunctionsSection <Idents> Newline
|
|
ClausesSection <Clauses> Eof
|
|
|
|
Idents := Identifier <Idents>
|
|
:= ε
|
|
|
|
Clauses := Newline <Clauses'>
|
|
:= ε
|
|
|
|
Clauses' := <Clause> <Clauses>
|
|
:= ε
|
|
|
|
Clause := <Term> <Clause'>
|
|
|
|
Clause' := <Clause>
|
|
:= ε
|
|
|
|
Term := Negate <Term>
|
|
:= Identifier <Func?>
|
|
|
|
Func? := OpenP <Term> <CSTerms> CloseP
|
|
:= ε
|
|
|
|
CSTerms := Comma <Term> <CSTerms>
|
|
:= ε
|
|
"""
|
|
|
|
if __name__ == '__main__':
|
|
# from emis_funky_funktions import cur2, flip
|
|
# from build_oracle import print_oracle_table_enum, oracle_table
|
|
# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
|
|
from build_oracle import oracle_table
|
|
from parse import parser
|
|
from lex import tokenize
|
|
|
|
with open('sample.cnf') as file:
|
|
lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))
|
|
|
|
oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
|
|
parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
|
|
maybe_ast = parser_(lexemes)
|
|
|
|
match maybe_ast:
|
|
case Ok([Ok(ast)]):
|
|
print('\n'.join(' or '.join(str(t) for t in c) for c in ast))
|
|
case Ok([Err(ArgumentsForVariable(v))]):
|
|
print(f'Semantic error: Arguments listed for variable {repr(v.matched_string)} on line {v.line}:{v.col_start}-{v.col_end}')
|
|
case Ok([Err(UnidentifiedVariable(v))]):
|
|
print(f'Semantic error: Unidentified identifier {repr(v.matched_string)} on line {v.line}:{v.col_start}-{v.col_end}')
|
|
case Ok(huh):
|
|
print('Unexpected end result: ', huh)
|
|
case Err((Lexeme(token, text, line, col_start, col_end), expected)):
|
|
print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}') |