diff --git a/grammar.py b/grammar.py index fddaa86..892dfc5 100644 --- a/grammar.py +++ b/grammar.py @@ -6,10 +6,16 @@ oracle table for the grammar it defines. It's recommended that this be done usi `build_oracle.sh` instead, however, which will build a whole python module containing the oracle table, complete with imports. """ +from emis_funky_funktions import * + +from dataclasses import dataclass from enum import auto, IntEnum from re import compile, Pattern -from typing import Collection, Mapping, Sequence, Tuple +from lex import Lexeme +from parse import Action + +from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias class Tok(IntEnum): """ @@ -67,51 +73,119 @@ class Variable(IntEnum): def __repr__(self) -> str: return f'<{self._name_}>' -GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [ +ASTTerm: TypeAlias = 'ASTNegated | ASTProp' + +@dataclass(frozen=True) +class ASTNegated: + term: ASTTerm + + def __str__(self) -> str: + return f'¬{self.term}' + +@dataclass(frozen=True) +class ASTProp: + ident: Lexeme[Tok] + arguments: Sequence[ASTTerm] + + def __str__(self) -> str: + if len(self.arguments): + return f'{self.ident.matched_string}({",".join(map(str, self.arguments))})' + else: + return self.ident.matched_string + +@dataclass(frozen=True) +class AST: + predicate_idents: Sequence[Lexeme[Tok]] + variable_idents: Sequence[Lexeme[Tok]] + const_idents: Sequence[Lexeme[Tok]] + func_idents: Sequence[Lexeme[Tok]] + clauses: Sequence[Sequence[ASTTerm]] + + def __str__(self) -> str: + return ( + 'Predicates: ' + repr([i.matched_string for i in self.predicate_idents]) + '\n' + + 'Variables: ' + repr([i.matched_string for i in self.variable_idents]) + '\n' + + 'Constants: ' + repr([i.matched_string for i in self.const_idents]) + '\n' + + 'Functions: ' + repr([i.matched_string for i in self.func_idents]) + '\n' + + 'Clauses:\n' + '\n'.join(' or '.join(str(term) for term in clause) for clause in self.clauses) + '\n' + ) + +def cons(stack: Sequence[Any]) -> Sequence[Any]: + match stack: + case [rest, head, *popped_stack]: + return ((head, *rest), *popped_stack) + case bad_stack: + raise Exception("Unexpected stack state!", bad_stack) + +nil: Sequence[Any] = tuple() +@cur2 +def introduce( + cons: Any, + stack: Sequence[Any] +) -> Sequence[Any]: + return (cons, *stack) + +def f_apply(stack: Sequence[Any]) -> Sequence[Any]: + match stack: + case [arg, func, *popped_stack] if hasattr(func, '__call__'): + return (func(arg), *popped_stack) + raise Exception("Unexpected stack state!", stack) +@cur2 +def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]: + match stack: + case [arg, *popped_stack]: + return (func(arg), *popped_stack) + case bad_stack: + raise Exception("Unexpected stack state!", bad_stack) + +def drop(stack: Sequence[Any]) -> Sequence[Any]: + return stack[1:] + +GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [ (Variable.Start, - [ Tok.PredicateSection, Variable.Idents, Tok.Newline - , Tok.VariablesSection, Variable.Idents, Tok.Newline - , Tok.ConstantsSection, Variable.Idents, Tok.Newline - , Tok.FunctionsSection, Variable.Idents, Tok.Newline - , Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ), + [ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,AST)), Tok.Newline, drop + , Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop + , Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop + , Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop + , Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ), (Variable.Idents, - [ Tok.Identifier, Variable.Idents ]), + [ Tok.Identifier, Variable.Idents, cons ]), (Variable.Idents, - [ ]), + [ introduce(nil) ]), (Variable.Clauses, - [ Tok.Newline, Variable.Clauses_ ]), + [ Tok.Newline, drop, Variable.Clauses_ ]), (Variable.Clauses, - [ ]), + [ introduce(nil) ]), (Variable.Clauses_, - [ Variable.Clause, Variable.Clauses ]), + [ Variable.Clause, Variable.Clauses, cons ]), (Variable.Clauses_, - [ ]), + [ introduce(nil) ]), (Variable.Clause, - [ Variable.Term, Variable.Clause_ ]), + [ Variable.Term, Variable.Clause_, cons ]), (Variable.Clause_, [ Variable.Clause ]), (Variable.Clause_, - [ ]), + [ introduce(nil) ]), (Variable.Term, - [ Tok.Negate, Variable.Term ]), + [ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]), (Variable.Term, - [ Tok.Identifier, Variable.Func ]), + [ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]), (Variable.Func, - [ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]), + [ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]), (Variable.Func, - [ ]), + [ introduce(nil) ]), (Variable.CSTerms, - [ Tok.Comma, Variable.Term, Variable.CSTerms ]), + [ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]), (Variable.CSTerms, - [ ]), + [ introduce(nil) ]), ] """ Implements the following grammar: @@ -147,6 +221,24 @@ CSTerms := Comma <Term> <CSTerms> """ if __name__ == '__main__': - from emis_funky_funktions import cur2, flip - from build_oracle import print_oracle_table_enum, oracle_table - print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore \ No newline at end of file + # from emis_funky_funktions import cur2, flip + # from build_oracle import print_oracle_table_enum, oracle_table + # print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore + from build_oracle import oracle_table + from parse import parser + from lex import tokenize + + with open('sample.cnf') as file: + lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read())) + + oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore + parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start) + maybe_ast = parser_(lexemes) + + match maybe_ast: + case Ok([ast]): + print(ast) + case Ok(huh): + print('Unexpected end result: ', huh) + case Err((Lexeme(token, text, line, col_start, col_end), expected)): + print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}') \ No newline at end of file diff --git a/lex.py b/lex.py index 4051c5a..11a1017 100644 --- a/lex.py +++ b/lex.py @@ -35,12 +35,13 @@ def try_lex1(regex: Pattern[str], tok: A, input: str, line_no: int, col_no: int) return None case match: assert match is not None - return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end()), input[match.end():])) + return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end() - 1), input[match.end():])) def tokenize( lex_table: Collection[Tuple[Pattern[str], A]], drop_tokens: Collection[A], + eof_token: A, input: str ) -> Result[List[Lexeme[A]], str]: """ @@ -86,7 +87,7 @@ def tokenize( ) return inner(rest_input, line_no+newline_count, new_col_no, prefix) else: - return Ok(prefix) + return Ok(prefix + [Lexeme(eof_token, '', line_no, col_no, col_no)]) return inner(input, 1, 1, []) diff --git a/parse.py b/parse.py index 45d9fcf..c2f273a 100644 --- a/parse.py +++ b/parse.py @@ -103,7 +103,10 @@ def parser( match stack: # A [Variable] case [top_of_stack, *popped_stack] if is_var(top_of_stack): - expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])] + try: + expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])] + except IndexError: + raise Exception('Unexpected end of input. Expected:', _expected(oracle[top_of_stack])) match expansions: case []: return Err((lexemes[0], _expected(oracle[top_of_stack])))