244 lines
7 KiB
Python
244 lines
7 KiB
Python
"""
|
|
A grammar for parsing CNF files
|
|
|
|
If this module is run directly in python, it will spit out valid python which produces an
|
|
oracle table for the grammar it defines. It's recommended that this be done using
|
|
`build_oracle.sh` instead, however, which will build a whole python module containing the
|
|
oracle table, complete with imports.
|
|
"""
|
|
from emis_funky_funktions import *
|
|
|
|
from dataclasses import dataclass
|
|
from enum import auto, IntEnum
|
|
from re import compile, Pattern
|
|
|
|
from lex import Lexeme
|
|
from parse import Action
|
|
|
|
from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
|
|
|
|
class Tok(IntEnum):
|
|
"""
|
|
All possible tokens used in the grammar
|
|
"""
|
|
Newline = auto()
|
|
Whitespace = auto()
|
|
PredicateSection = auto()
|
|
VariablesSection = auto()
|
|
ConstantsSection = auto()
|
|
FunctionsSection = auto()
|
|
ClausesSection = auto()
|
|
Negate = auto()
|
|
OpenP = auto()
|
|
CloseP = auto()
|
|
Comma = auto()
|
|
Identifier = auto()
|
|
Eof = auto()
|
|
|
|
def __repr__(self):
|
|
return self._name_
|
|
|
|
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
|
(compile(r"\n"), Tok.Newline),
|
|
(compile(r"[ \t]+"), Tok.Whitespace),
|
|
(compile("Predicates:"), Tok.PredicateSection),
|
|
(compile("Variables:"), Tok.VariablesSection),
|
|
(compile("Constants:"), Tok.ConstantsSection),
|
|
(compile("Functions:"), Tok.FunctionsSection),
|
|
(compile("Clauses:"), Tok.ClausesSection),
|
|
(compile("!"), Tok.Negate),
|
|
(compile(r"\("), Tok.OpenP),
|
|
(compile(r"\)"), Tok.CloseP),
|
|
(compile(","), Tok.Comma),
|
|
(compile(r"\w+"), Tok.Identifier),
|
|
]
|
|
"""
|
|
A mapping of regexs to the tokens the identify
|
|
|
|
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
|
on the list also matches. All unicode strings should be matched by at least one token.
|
|
"""
|
|
|
|
class Variable(IntEnum):
|
|
Start = auto()
|
|
Idents = auto()
|
|
Clauses = auto()
|
|
Clauses_ = auto()
|
|
Clause = auto()
|
|
Clause_ = auto()
|
|
Term = auto()
|
|
Func = auto()
|
|
CSTerms = auto()
|
|
|
|
def __repr__(self) -> str:
|
|
return f'<{self._name_}>'
|
|
|
|
ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
|
|
|
|
@dataclass(frozen=True)
|
|
class ASTNegated:
|
|
term: ASTTerm
|
|
|
|
def __str__(self) -> str:
|
|
return f'¬{self.term}'
|
|
|
|
@dataclass(frozen=True)
|
|
class ASTProp:
|
|
ident: Lexeme[Tok]
|
|
arguments: Sequence[ASTTerm]
|
|
|
|
def __str__(self) -> str:
|
|
if len(self.arguments):
|
|
return f'{self.ident.matched_string}({",".join(map(str, self.arguments))})'
|
|
else:
|
|
return self.ident.matched_string
|
|
|
|
@dataclass(frozen=True)
|
|
class AST:
|
|
predicate_idents: Sequence[Lexeme[Tok]]
|
|
variable_idents: Sequence[Lexeme[Tok]]
|
|
const_idents: Sequence[Lexeme[Tok]]
|
|
func_idents: Sequence[Lexeme[Tok]]
|
|
clauses: Sequence[Sequence[ASTTerm]]
|
|
|
|
def __str__(self) -> str:
|
|
return (
|
|
'Predicates: ' + repr([i.matched_string for i in self.predicate_idents]) + '\n' +
|
|
'Variables: ' + repr([i.matched_string for i in self.variable_idents]) + '\n' +
|
|
'Constants: ' + repr([i.matched_string for i in self.const_idents]) + '\n' +
|
|
'Functions: ' + repr([i.matched_string for i in self.func_idents]) + '\n' +
|
|
'Clauses:\n' + '\n'.join(' or '.join(str(term) for term in clause) for clause in self.clauses) + '\n'
|
|
)
|
|
|
|
def cons(stack: Sequence[Any]) -> Sequence[Any]:
|
|
match stack:
|
|
case [rest, head, *popped_stack]:
|
|
return ((head, *rest), *popped_stack)
|
|
case bad_stack:
|
|
raise Exception("Unexpected stack state!", bad_stack)
|
|
|
|
nil: Sequence[Any] = tuple()
|
|
@cur2
|
|
def introduce(
|
|
cons: Any,
|
|
stack: Sequence[Any]
|
|
) -> Sequence[Any]:
|
|
return (cons, *stack)
|
|
|
|
def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
|
|
match stack:
|
|
case [arg, func, *popped_stack] if hasattr(func, '__call__'):
|
|
return (func(arg), *popped_stack)
|
|
raise Exception("Unexpected stack state!", stack)
|
|
@cur2
|
|
def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
|
|
match stack:
|
|
case [arg, *popped_stack]:
|
|
return (func(arg), *popped_stack)
|
|
case bad_stack:
|
|
raise Exception("Unexpected stack state!", bad_stack)
|
|
|
|
def drop(stack: Sequence[Any]) -> Sequence[Any]:
|
|
return stack[1:]
|
|
|
|
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
|
|
(Variable.Start,
|
|
[ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,AST)), Tok.Newline, drop
|
|
, Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
|
, Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
|
, Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
|
, Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
|
|
|
|
(Variable.Idents,
|
|
[ Tok.Identifier, Variable.Idents, cons ]),
|
|
(Variable.Idents,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Clauses,
|
|
[ Tok.Newline, drop, Variable.Clauses_ ]),
|
|
(Variable.Clauses,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Clauses_,
|
|
[ Variable.Clause, Variable.Clauses, cons ]),
|
|
(Variable.Clauses_,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Clause,
|
|
[ Variable.Term, Variable.Clause_, cons ]),
|
|
|
|
(Variable.Clause_,
|
|
[ Variable.Clause ]),
|
|
(Variable.Clause_,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.Term,
|
|
[ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]),
|
|
(Variable.Term,
|
|
[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
|
|
|
|
(Variable.Func,
|
|
[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
|
|
(Variable.Func,
|
|
[ introduce(nil) ]),
|
|
|
|
(Variable.CSTerms,
|
|
[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
|
|
(Variable.CSTerms,
|
|
[ introduce(nil) ]),
|
|
]
|
|
"""
|
|
Implements the following grammar:
|
|
|
|
Start := PredicateSection <Idents> Newline
|
|
VariablesSection <Idents> Newline
|
|
ConstantsSection <Idents> Newline
|
|
FunctionsSection <Idents> Newline
|
|
ClausesSection <Clauses> Eof
|
|
|
|
Idents := Identifier <Idents>
|
|
:= ε
|
|
|
|
Clauses := Newline <Clauses'>
|
|
:= ε
|
|
|
|
Clauses' := <Clause> <Clauses>
|
|
:= ε
|
|
|
|
Clause := <Term> <Clause'>
|
|
|
|
Clause' := <Clause>
|
|
:= ε
|
|
|
|
Term := Negate <Term>
|
|
:= Identifier <Func?>
|
|
|
|
Func? := OpenP <Term> <CSTerms> CloseP
|
|
:= ε
|
|
|
|
CSTerms := Comma <Term> <CSTerms>
|
|
:= ε
|
|
"""
|
|
|
|
if __name__ == '__main__':
|
|
# from emis_funky_funktions import cur2, flip
|
|
# from build_oracle import print_oracle_table_enum, oracle_table
|
|
# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
|
|
from build_oracle import oracle_table
|
|
from parse import parser
|
|
from lex import tokenize
|
|
|
|
with open('sample.cnf') as file:
|
|
lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))
|
|
|
|
oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
|
|
parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
|
|
maybe_ast = parser_(lexemes)
|
|
|
|
match maybe_ast:
|
|
case Ok([ast]):
|
|
print(ast)
|
|
case Ok(huh):
|
|
print('Unexpected end result: ', huh)
|
|
case Err((Lexeme(token, text, line, col_start, col_end), expected)):
|
|
print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}') |