""" A grammar for parsing CNF files If this module is run directly in python, it will spit out valid python which produces an oracle table for the grammar it defines. It's recommended that this be done using `build_oracle.sh` instead, however, which will build a whole python module containing the oracle table, complete with imports. """ from emis_funky_funktions import * from dataclasses import dataclass from enum import auto, IntEnum from re import compile, Pattern from lex import Lexeme from parse import Action from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias class Tok(IntEnum): """ All possible tokens used in the grammar """ Newline = auto() Whitespace = auto() PredicateSection = auto() VariablesSection = auto() ConstantsSection = auto() FunctionsSection = auto() ClausesSection = auto() Negate = auto() OpenP = auto() CloseP = auto() Comma = auto() Identifier = auto() Eof = auto() def __repr__(self): return self._name_ LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [ (compile(r"\n"), Tok.Newline), (compile(r"[ \t]+"), Tok.Whitespace), (compile("Predicates:"), Tok.PredicateSection), (compile("Variables:"), Tok.VariablesSection), (compile("Constants:"), Tok.ConstantsSection), (compile("Functions:"), Tok.FunctionsSection), (compile("Clauses:"), Tok.ClausesSection), (compile("!"), Tok.Negate), (compile(r"\("), Tok.OpenP), (compile(r"\)"), Tok.CloseP), (compile(","), Tok.Comma), (compile(r"\w+"), Tok.Identifier), ] """ A mapping of regexs to the tokens the identify Tokens earlier on in the list should be regarded as higher priority, even if a match lower on the list also matches. All unicode strings should be matched by at least one token. """ class Variable(IntEnum): Start = auto() Idents = auto() Clauses = auto() Clauses_ = auto() Clause = auto() Clause_ = auto() Term = auto() Func = auto() CSTerms = auto() def __repr__(self) -> str: return f'<{self._name_}>' ASTTerm: TypeAlias = 'ASTNegated | ASTProp' class IdentKind(IntEnum): Function = auto() Constant = auto() Variable = auto() Predicate = auto() @dataclass(frozen=True) class CallingNonFunc: term: Lexeme[Tok] obj_type: IdentKind def __str__(self): return f'Semantic error: Attempted to call {repr(self.term.matched_string)} (a {self.obj_type.name.lower()}) with arguments on line {self.term.line}:{self.term.col_start}-{self.term.col_end}' @dataclass(frozen=True) class MissingArguments: term: Lexeme[Tok] def __str__(self): return f'Semantic error: The function {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end} is missing arguments!' @dataclass(frozen=True) class UnidentifiedVariable: term: Lexeme[Tok] def __str__(self): return f'Semantic error: Unidentified identifier {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end}' @dataclass(frozen=True) class PropUsedInObjectContext: term: Lexeme[Tok] def __str__(self): return f'Semantic error: The proposition {repr(self.term.matched_string)} was used in a context where an object was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}' @dataclass(frozen=True) class ObjectUsedInPropContext: term: Lexeme[Tok] obj_type: IdentKind def __str__(self): return f'Semantic error: The {self.obj_type.name.lower()} {repr(self.term.matched_string)} was used in a context where a proposition was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}' @dataclass(frozen=True) class NegationOfObject: line: int col: int def __str__(self): return f'Semantic error: Attempted to use negation in a context where working on objects on line {self.line}:{self.col}' GenIrError: TypeAlias = CallingNonFunc | MissingArguments | UnidentifiedVariable | PropUsedInObjectContext | ObjectUsedInPropContext | NegationOfObject @dataclass(frozen=True) class IdentBindings: predicate_idents: Sequence[str] variable_idents: Sequence[str] const_idents: Sequence[str] func_idents: Sequence[str] @dataclass(frozen=True) class ASTNegated: neg_lexeme: Lexeme[Tok] term: ASTTerm def make_ir(self, idents: IdentBindings, is_prop: bool) -> 'Result[IRTerm, GenIrError]': if is_prop: return map_res(IRNeg, self.term.make_ir(idents, True)) else: return Err(NegationOfObject(self.neg_lexeme.line, self.neg_lexeme.col_start)) @dataclass(frozen=True) class ASTProp: ident: Lexeme[Tok] arguments: Sequence[ASTTerm] def make_ir(self, idents: IdentBindings, is_pred: bool) -> 'Result[IRTerm, GenIrError]': bound_type = ( IdentKind.Predicate if self.ident.matched_string in idents.predicate_idents else IdentKind.Variable if self.ident.matched_string in idents.variable_idents else IdentKind.Constant if self.ident.matched_string in idents.const_idents else IdentKind.Function if self.ident.matched_string in idents.func_idents else None ) if bound_type is None: return Err(UnidentifiedVariable(self.ident)) if is_pred: if bound_type != IdentKind.Predicate: return Err(ObjectUsedInPropContext(self.ident, bound_type)) else: if bound_type == IdentKind.Function: if not len(self.arguments): return Err(MissingArguments(self.ident)) elif bound_type == IdentKind.Predicate: return Err(PropUsedInObjectContext(self.ident)) else: if len(self.arguments): return Err(CallingNonFunc(self.ident, bound_type)) if bound_type == IdentKind.Variable: return Ok(IRVar(self.ident)) else: arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments]) return map_res(p(IRProp, self.ident), arg_ir) @dataclass(frozen=True) class IRProp: lexeme: Lexeme[Tok] arguments: 'Sequence[IRTerm]' def __str__(self) -> str: return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})' @dataclass(frozen=True) class IRVar: lexeme: Lexeme[Tok] def __str__(self) -> str: return f'*{self.lexeme.matched_string}' @dataclass(frozen=True) class IRNeg: inner: 'IRTerm' def __str__(self) -> str: return f'¬{self.inner}' IRTerm: TypeAlias = IRVar | IRProp | IRNeg @cur2 def make_ir( idents: IdentBindings, clauses: Sequence[Sequence[ASTTerm]], ) -> Result[Sequence[Sequence[IRTerm]], GenIrError]: return sequence([sequence([term.make_ir(idents, True) for term in clause]) for clause in clauses]) def cons(stack: Sequence[Any]) -> Sequence[Any]: match stack: case [rest, head, *popped_stack]: return ((head, *rest), *popped_stack) case bad_stack: raise Exception("Unexpected stack state!", bad_stack) nil: Sequence[Any] = tuple() @cur2 def introduce( cons: Any, stack: Sequence[Any] ) -> Sequence[Any]: return (cons, *stack) def f_apply(stack: Sequence[Any]) -> Sequence[Any]: match stack: case [arg, func, *popped_stack] if hasattr(func, '__call__'): return (func(arg), *popped_stack) raise Exception("Unexpected stack state!", stack) @cur2 def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]: match stack: case [arg, *popped_stack]: return (func(arg), *popped_stack) case bad_stack: raise Exception("Unexpected stack state!", bad_stack) def drop(stack: Sequence[Any]) -> Sequence[Any]: return stack[1:] GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [ (Variable.Start, [ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,IdentBindings)), Tok.Newline, drop , Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop , Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop , Tok.FunctionsSection, drop, Variable.Idents, f_apply, call_func(make_ir), Tok.Newline, drop , Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ), (Variable.Idents, [ Tok.Identifier, call_func(lambda i: i.matched_string), Variable.Idents, cons ]), (Variable.Idents, [ introduce(nil) ]), (Variable.Clauses, [ Tok.Newline, drop, Variable.Clauses_ ]), (Variable.Clauses, [ introduce(nil) ]), (Variable.Clauses_, [ Variable.Clause, Variable.Clauses, cons ]), (Variable.Clauses_, [ introduce(nil) ]), (Variable.Clause, [ Variable.Term, Variable.Clause_, cons ]), (Variable.Clause_, [ Variable.Clause ]), (Variable.Clause_, [ introduce(nil) ]), (Variable.Term, [ Tok.Negate, call_func(cur2(ASTNegated)), Variable.Term, f_apply ]), (Variable.Term, [ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]), (Variable.Func, [ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]), (Variable.Func, [ introduce(nil) ]), (Variable.CSTerms, [ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]), (Variable.CSTerms, [ introduce(nil) ]), ] """ Implements the following grammar: Start := PredicateSection Newline VariablesSection Newline ConstantsSection Newline FunctionsSection Newline ClausesSection Eof Idents := Identifier := ε Clauses := Newline := ε Clauses' := := ε Clause := Clause' := := ε Term := Negate := Identifier Func? := OpenP CloseP := ε CSTerms := Comma := ε """ def lex_and_parse(input: str) -> Result[Result[Sequence[Sequence[IRTerm]], GenIrError], Tuple[Lexeme[Tok], Collection[Tok]]]: lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, input)) oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start) match parser_(lexemes): case Ok([Ok(ir)]): return Ok(Ok(ir)) case Ok([Err(err)]): return Ok(Err(err)) case Ok(huh): raise Exception('Unexpected end result: ', huh) case Err(e_tup): return Err(e_tup) #type:ignore raise Exception('Unreachable') if __name__ == '__main__': # from emis_funky_funktions import cur2, flip # from build_oracle import print_oracle_table_enum, oracle_table # print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore from build_oracle import oracle_table from parse import parser from lex import tokenize import sys if len(sys.argv) == 2: with open(sys.argv[1]) as file: match lex_and_parse(file.read()): case Ok(Ok(ir)): print('\n'.join(' or '.join(str(t) for t in c) for c in ir)) case Ok(Err(err)): print(err) exit(102) case Err((Lexeme(token, text, line, col_start, col_end), expected)): print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}') exit(101) else: print(f'Usage: python {sys.argv[0]} ') exit(100)