Get parsing for the sample document working

2023-03-04 22:02:16 -05:00 · 2023-03-04 22:02:16 -05:00 · 532a5a14d0
parent b8b6ba708f
commit 532a5a14d0
3 changed files with 123 additions and 27 deletions
--- a/grammar.py
+++ b/grammar.py
@ -6,10 +6,16 @@ oracle table for the grammar it defines.  It's recommended that this be done usi
 `build_oracle.sh` instead, however, which will build a whole python module containing the
 oracle table, complete with imports.
 """
+from emis_funky_funktions import *
+
+from dataclasses import dataclass
 from enum import auto, IntEnum
 from re import compile, Pattern

-from typing import Collection, Mapping, Sequence, Tuple
+from lex import Lexeme
+from parse import Action
+
+from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias

 class Tok(IntEnum):
 	"""
@ -67,51 +73,119 @@ class Variable(IntEnum):
 	def __repr__(self) -> str:
 		return f'<{self._name_}>'

-GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
+ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
+
+@dataclass(frozen=True)
+class ASTNegated:
+    term: ASTTerm
+
+    def __str__(self) -> str:
+        return f'¬{self.term}'
+
+@dataclass(frozen=True)
+class ASTProp:
+    ident: Lexeme[Tok]
+    arguments: Sequence[ASTTerm]
+
+    def __str__(self) -> str:
+        if len(self.arguments):
+            return f'{self.ident.matched_string}({",".join(map(str, self.arguments))})'
+        else:
+            return self.ident.matched_string
+
+@dataclass(frozen=True)
+class AST:
+    predicate_idents: Sequence[Lexeme[Tok]]
+    variable_idents: Sequence[Lexeme[Tok]]
+    const_idents: Sequence[Lexeme[Tok]]
+    func_idents: Sequence[Lexeme[Tok]]
+    clauses: Sequence[Sequence[ASTTerm]]
+
+    def __str__(self) -> str:
+        return (
+        'Predicates: ' + repr([i.matched_string for i in self.predicate_idents]) + '\n' +
+        'Variables:  ' + repr([i.matched_string for i in self.variable_idents]) + '\n' +
+        'Constants:  ' + repr([i.matched_string for i in self.const_idents]) + '\n' +
+        'Functions:  ' + repr([i.matched_string for i in self.func_idents]) + '\n' +
+        'Clauses:\n'   + '\n'.join(' or '.join(str(term) for term in clause) for clause in self.clauses) + '\n'
+        )
+
+def cons(stack: Sequence[Any]) -> Sequence[Any]:
+    match stack:
+        case [rest, head, *popped_stack]:
+            return ((head, *rest), *popped_stack)
+        case bad_stack:
+            raise Exception("Unexpected stack state!", bad_stack)
+
+nil: Sequence[Any] = tuple()
+@cur2
+def introduce(
+    cons: Any,
+    stack: Sequence[Any]
+) -> Sequence[Any]:
+    return (cons, *stack)
+
+def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
+    match stack:
+        case [arg, func, *popped_stack] if hasattr(func, '__call__'):
+            return (func(arg), *popped_stack)
+    raise Exception("Unexpected stack state!", stack)
+@cur2
+def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
+    match stack:
+        case [arg, *popped_stack]:
+            return (func(arg), *popped_stack)
+        case bad_stack:
+            raise Exception("Unexpected stack state!", bad_stack)
+
+def drop(stack: Sequence[Any]) -> Sequence[Any]:
+    return stack[1:]
+
+GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
 	(Variable.Start,
-		 [ Tok.PredicateSection, Variable.Idents, Tok.Newline
-		 , Tok.VariablesSection, Variable.Idents, Tok.Newline
-		 , Tok.ConstantsSection, Variable.Idents, Tok.Newline
-		 , Tok.FunctionsSection, Variable.Idents, Tok.Newline
-		 , Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
+		 [ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,AST)), Tok.Newline, drop
+		 , Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
+		 , Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
+		 , Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
+		 , Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),

 	(Variable.Idents,
-		[ Tok.Identifier, Variable.Idents ]),
+		[ Tok.Identifier, Variable.Idents, cons ]),
 	(Variable.Idents,
-		[ ]),
+		[ introduce(nil) ]),

 	(Variable.Clauses,
-		[ Tok.Newline, Variable.Clauses_ ]),
+		[ Tok.Newline, drop, Variable.Clauses_ ]),
 	(Variable.Clauses,
-		[ ]),
+		[ introduce(nil) ]),

 	(Variable.Clauses_,
-		[ Variable.Clause, Variable.Clauses ]),
+		[ Variable.Clause, Variable.Clauses, cons ]),
 	(Variable.Clauses_,
-		[ ]),
+		[ introduce(nil) ]),

 	(Variable.Clause,
-		[ Variable.Term, Variable.Clause_ ]),
+		[ Variable.Term, Variable.Clause_, cons ]),

 	(Variable.Clause_,
 		[ Variable.Clause ]),
 	(Variable.Clause_,
-		[ ]),
+		[ introduce(nil) ]),

 	(Variable.Term,
-		[ Tok.Negate, Variable.Term ]),
+		[ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]),
 	(Variable.Term,
-		[ Tok.Identifier, Variable.Func ]),
+		[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),

 	(Variable.Func,
-		[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
+		[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
 	(Variable.Func,
-		[ ]),
+		[ introduce(nil) ]),

 	(Variable.CSTerms,
-		[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
+		[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
 	(Variable.CSTerms,
-		[ ]),
+		[ introduce(nil) ]),
 ]
 """
 Implements the following grammar:
@ -147,6 +221,24 @@ CSTerms  := Comma <Term> <CSTerms>
 """

 if __name__ == '__main__':
-	from emis_funky_funktions import cur2, flip
-	from build_oracle import print_oracle_table_enum, oracle_table
-	print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
+    # from emis_funky_funktions import cur2, flip
+	# from build_oracle import print_oracle_table_enum, oracle_table
+	# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
+    from build_oracle import oracle_table
+    from parse import parser
+    from lex import tokenize
+
+    with open('sample.cnf') as file:
+        lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))
+
+        oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
+        parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
+        maybe_ast = parser_(lexemes)
+
+        match maybe_ast:
+            case Ok([ast]):
+                print(ast)
+            case Ok(huh):
+                print('Unexpected end result: ', huh)
+            case Err((Lexeme(token, text, line, col_start, col_end), expected)):
+                print(f'Parse error!  Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')
--- a/lex.py
+++ b/lex.py
@ -35,12 +35,13 @@ def try_lex1(regex: Pattern[str], tok: A, input: str, line_no: int, col_no: int)
 			return None
 		case match:
 			assert match is not None
-			return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end()), input[match.end():]))
+			return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end() - 1), input[match.end():]))


 def tokenize(
 	lex_table: Collection[Tuple[Pattern[str], A]],
 	drop_tokens: Collection[A],
+	eof_token: A,
 	input: str
 ) -> Result[List[Lexeme[A]], str]:
 	"""
@ -86,7 +87,7 @@ def tokenize(
 			)
 			return inner(rest_input, line_no+newline_count, new_col_no, prefix)
 		else:
-			return Ok(prefix)
+			return Ok(prefix + [Lexeme(eof_token, '', line_no, col_no, col_no)])
 	return inner(input, 1, 1, [])


--- a/parse.py
+++ b/parse.py
@ -103,7 +103,10 @@ def parser(
        match stack:
            # A [Variable]
            case [top_of_stack, *popped_stack] if is_var(top_of_stack):
-                expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
+                try:
+                    expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
+                except IndexError:
+                    raise Exception('Unexpected end of input.  Expected:', _expected(oracle[top_of_stack]))
                match expansions:
                    case []:
                        return Err((lexemes[0], _expected(oracle[top_of_stack])))