Get parsing for the sample document working

2023-03-04 22:02:16 -05:00 · 2023-03-04 22:02:16 -05:00 · 532a5a14d0
parent b8b6ba708f
commit 532a5a14d0
3 changed files with 123 additions and 27 deletions
--- a/grammar.py
+++ b/grammar.py
@ -6,10 +6,16 @@ oracle table for the grammar it defines.  It's recommended that this be done usi
 `build_oracle.sh` instead, however, which will build a whole python module containing the
 oracle table, complete with imports.
 """
 from emis_funky_funktions import *
 from dataclasses import dataclass
 from enum import auto, IntEnum
 from re import compile, Pattern
-from typing import Collection, Mapping, Sequence, Tuple
+from lex import Lexeme
 from parse import Action
 from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
 class Tok(IntEnum):
 	"""
@ -67,51 +73,119 @@ class Variable(IntEnum):
 	def __repr__(self) -> str:
 		return f'<{self._name_}>'
-GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
+ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
@dataclass(frozen=True)
 class ASTNegated:
    term: ASTTerm
    def __str__(self) -> str:
        return f'¬{self.term}'
@dataclass(frozen=True)
 class ASTProp:
    ident: Lexeme[Tok]
    arguments: Sequence[ASTTerm]
    def __str__(self) -> str:
        if len(self.arguments):
            return f'{self.ident.matched_string}({",".join(map(str, self.arguments))})'
        else:
            return self.ident.matched_string
@dataclass(frozen=True)
 class AST:
    predicate_idents: Sequence[Lexeme[Tok]]
    variable_idents: Sequence[Lexeme[Tok]]
    const_idents: Sequence[Lexeme[Tok]]
    func_idents: Sequence[Lexeme[Tok]]
    clauses: Sequence[Sequence[ASTTerm]]
    def __str__(self) -> str:
        return (
        'Predicates: ' + repr([i.matched_string for i in self.predicate_idents]) + '\n' +
        'Variables:  ' + repr([i.matched_string for i in self.variable_idents]) + '\n' +
        'Constants:  ' + repr([i.matched_string for i in self.const_idents]) + '\n' +
        'Functions:  ' + repr([i.matched_string for i in self.func_idents]) + '\n' +
        'Clauses:\n'   + '\n'.join(' or '.join(str(term) for term in clause) for clause in self.clauses) + '\n'
        )
 def cons(stack: Sequence[Any]) -> Sequence[Any]:
    match stack:
        case [rest, head, *popped_stack]:
            return ((head, *rest), *popped_stack)
        case bad_stack:
            raise Exception("Unexpected stack state!", bad_stack)
 nil: Sequence[Any] = tuple()
@cur2
 def introduce(
    cons: Any,
    stack: Sequence[Any]
 ) -> Sequence[Any]:
    return (cons, *stack)
 def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
    match stack:
        case [arg, func, *popped_stack] if hasattr(func, '__call__'):
            return (func(arg), *popped_stack)
    raise Exception("Unexpected stack state!", stack)
@cur2
 def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
    match stack:
        case [arg, *popped_stack]:
            return (func(arg), *popped_stack)
        case bad_stack:
            raise Exception("Unexpected stack state!", bad_stack)
 def drop(stack: Sequence[Any]) -> Sequence[Any]:
    return stack[1:]
 GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
 	(Variable.Start,
-		 [ Tok.PredicateSection, Variable.Idents, Tok.Newline
+		 [ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,AST)), Tok.Newline, drop
-		 , Tok.VariablesSection, Variable.Idents, Tok.Newline
+		 , Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
-		 , Tok.ConstantsSection, Variable.Idents, Tok.Newline
+		 , Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
-		 , Tok.FunctionsSection, Variable.Idents, Tok.Newline
+		 , Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
-		 , Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
+		 , Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
 	(Variable.Idents,
-		[ Tok.Identifier, Variable.Idents ]),
+		[ Tok.Identifier, Variable.Idents, cons ]),
 	(Variable.Idents,
-		[ ]),
+		[ introduce(nil) ]),
 	(Variable.Clauses,
-		[ Tok.Newline, Variable.Clauses_ ]),
+		[ Tok.Newline, drop, Variable.Clauses_ ]),
 	(Variable.Clauses,
-		[ ]),
+		[ introduce(nil) ]),
 	(Variable.Clauses_,
-		[ Variable.Clause, Variable.Clauses ]),
+		[ Variable.Clause, Variable.Clauses, cons ]),
 	(Variable.Clauses_,
-		[ ]),
+		[ introduce(nil) ]),
 	(Variable.Clause,
-		[ Variable.Term, Variable.Clause_ ]),
+		[ Variable.Term, Variable.Clause_, cons ]),
 	(Variable.Clause_,
 		[ Variable.Clause ]),
 	(Variable.Clause_,
-		[ ]),
+		[ introduce(nil) ]),
 	(Variable.Term,
-		[ Tok.Negate, Variable.Term ]),
+		[ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]),
 	(Variable.Term,
-		[ Tok.Identifier, Variable.Func ]),
+		[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
 	(Variable.Func,
-		[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
+		[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
 	(Variable.Func,
-		[ ]),
+		[ introduce(nil) ]),
 	(Variable.CSTerms,
-		[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
+		[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
 	(Variable.CSTerms,
-		[ ]),
+		[ introduce(nil) ]),
 ]
 """
 Implements the following grammar:
@ -147,6 +221,24 @@ CSTerms  := Comma <Term> <CSTerms>
 """
 if __name__ == '__main__':
-	from emis_funky_funktions import cur2, flip
+    # from emis_funky_funktions import cur2, flip
-	from build_oracle import print_oracle_table_enum, oracle_table
+	# from build_oracle import print_oracle_table_enum, oracle_table
-	print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
+	# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
    from build_oracle import oracle_table
    from parse import parser
    from lex import tokenize
    with open('sample.cnf') as file:
        lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))
        oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
        parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
        maybe_ast = parser_(lexemes)
        match maybe_ast:
            case Ok([ast]):
                print(ast)
            case Ok(huh):
                print('Unexpected end result: ', huh)
            case Err((Lexeme(token, text, line, col_start, col_end), expected)):
                print(f'Parse error!  Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')
--- a/lex.py
+++ b/lex.py
@ -35,12 +35,13 @@ def try_lex1(regex: Pattern[str], tok: A, input: str, line_no: int, col_no: int)
 			return None
 		case match:
 			assert match is not None
-			return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end()), input[match.end():]))
+			return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end() - 1), input[match.end():]))
 def tokenize(
 	lex_table: Collection[Tuple[Pattern[str], A]],
 	drop_tokens: Collection[A],
 	eof_token: A,
 	input: str
 ) -> Result[List[Lexeme[A]], str]:
 	"""
@ -86,7 +87,7 @@ def tokenize(
 			)
 			return inner(rest_input, line_no+newline_count, new_col_no, prefix)
 		else:
-			return Ok(prefix)
+			return Ok(prefix + [Lexeme(eof_token, '', line_no, col_no, col_no)])
 	return inner(input, 1, 1, [])
--- a/parse.py
+++ b/parse.py
@ -103,7 +103,10 @@ def parser(
        match stack:
            # A [Variable]
            case [top_of_stack, *popped_stack] if is_var(top_of_stack):
-                expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
+                try:
                    expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
                except IndexError:
                    raise Exception('Unexpected end of input.  Expected:', _expected(oracle[top_of_stack]))
                match expansions:
                    case []:
                        return Err((lexemes[0], _expected(oracle[top_of_stack])))