Get parsing for the sample document working

This commit is contained in:
Emi Simpson 2023-03-04 22:02:16 -05:00
parent b8b6ba708f
commit 532a5a14d0
Signed by: Emi
GPG key ID: A12F2C2FFDC3D847
3 changed files with 123 additions and 27 deletions

View file

@ -6,10 +6,16 @@ oracle table for the grammar it defines. It's recommended that this be done usi
`build_oracle.sh` instead, however, which will build a whole python module containing the `build_oracle.sh` instead, however, which will build a whole python module containing the
oracle table, complete with imports. oracle table, complete with imports.
""" """
from emis_funky_funktions import *
from dataclasses import dataclass
from enum import auto, IntEnum from enum import auto, IntEnum
from re import compile, Pattern from re import compile, Pattern
from typing import Collection, Mapping, Sequence, Tuple from lex import Lexeme
from parse import Action
from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
class Tok(IntEnum): class Tok(IntEnum):
""" """
@ -67,51 +73,119 @@ class Variable(IntEnum):
def __repr__(self) -> str: def __repr__(self) -> str:
return f'<{self._name_}>' return f'<{self._name_}>'
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [ ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
@dataclass(frozen=True)
class ASTNegated:
term: ASTTerm
def __str__(self) -> str:
return f'¬{self.term}'
@dataclass(frozen=True)
class ASTProp:
ident: Lexeme[Tok]
arguments: Sequence[ASTTerm]
def __str__(self) -> str:
if len(self.arguments):
return f'{self.ident.matched_string}({",".join(map(str, self.arguments))})'
else:
return self.ident.matched_string
@dataclass(frozen=True)
class AST:
predicate_idents: Sequence[Lexeme[Tok]]
variable_idents: Sequence[Lexeme[Tok]]
const_idents: Sequence[Lexeme[Tok]]
func_idents: Sequence[Lexeme[Tok]]
clauses: Sequence[Sequence[ASTTerm]]
def __str__(self) -> str:
return (
'Predicates: ' + repr([i.matched_string for i in self.predicate_idents]) + '\n' +
'Variables: ' + repr([i.matched_string for i in self.variable_idents]) + '\n' +
'Constants: ' + repr([i.matched_string for i in self.const_idents]) + '\n' +
'Functions: ' + repr([i.matched_string for i in self.func_idents]) + '\n' +
'Clauses:\n' + '\n'.join(' or '.join(str(term) for term in clause) for clause in self.clauses) + '\n'
)
def cons(stack: Sequence[Any]) -> Sequence[Any]:
match stack:
case [rest, head, *popped_stack]:
return ((head, *rest), *popped_stack)
case bad_stack:
raise Exception("Unexpected stack state!", bad_stack)
nil: Sequence[Any] = tuple()
@cur2
def introduce(
cons: Any,
stack: Sequence[Any]
) -> Sequence[Any]:
return (cons, *stack)
def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
match stack:
case [arg, func, *popped_stack] if hasattr(func, '__call__'):
return (func(arg), *popped_stack)
raise Exception("Unexpected stack state!", stack)
@cur2
def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
match stack:
case [arg, *popped_stack]:
return (func(arg), *popped_stack)
case bad_stack:
raise Exception("Unexpected stack state!", bad_stack)
def drop(stack: Sequence[Any]) -> Sequence[Any]:
return stack[1:]
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
(Variable.Start, (Variable.Start,
[ Tok.PredicateSection, Variable.Idents, Tok.Newline [ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,AST)), Tok.Newline, drop
, Tok.VariablesSection, Variable.Idents, Tok.Newline , Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
, Tok.ConstantsSection, Variable.Idents, Tok.Newline , Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
, Tok.FunctionsSection, Variable.Idents, Tok.Newline , Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
, Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ), , Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
(Variable.Idents, (Variable.Idents,
[ Tok.Identifier, Variable.Idents ]), [ Tok.Identifier, Variable.Idents, cons ]),
(Variable.Idents, (Variable.Idents,
[ ]), [ introduce(nil) ]),
(Variable.Clauses, (Variable.Clauses,
[ Tok.Newline, Variable.Clauses_ ]), [ Tok.Newline, drop, Variable.Clauses_ ]),
(Variable.Clauses, (Variable.Clauses,
[ ]), [ introduce(nil) ]),
(Variable.Clauses_, (Variable.Clauses_,
[ Variable.Clause, Variable.Clauses ]), [ Variable.Clause, Variable.Clauses, cons ]),
(Variable.Clauses_, (Variable.Clauses_,
[ ]), [ introduce(nil) ]),
(Variable.Clause, (Variable.Clause,
[ Variable.Term, Variable.Clause_ ]), [ Variable.Term, Variable.Clause_, cons ]),
(Variable.Clause_, (Variable.Clause_,
[ Variable.Clause ]), [ Variable.Clause ]),
(Variable.Clause_, (Variable.Clause_,
[ ]), [ introduce(nil) ]),
(Variable.Term, (Variable.Term,
[ Tok.Negate, Variable.Term ]), [ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]),
(Variable.Term, (Variable.Term,
[ Tok.Identifier, Variable.Func ]), [ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
(Variable.Func, (Variable.Func,
[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]), [ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
(Variable.Func, (Variable.Func,
[ ]), [ introduce(nil) ]),
(Variable.CSTerms, (Variable.CSTerms,
[ Tok.Comma, Variable.Term, Variable.CSTerms ]), [ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
(Variable.CSTerms, (Variable.CSTerms,
[ ]), [ introduce(nil) ]),
] ]
""" """
Implements the following grammar: Implements the following grammar:
@ -147,6 +221,24 @@ CSTerms := Comma <Term> <CSTerms>
""" """
if __name__ == '__main__': if __name__ == '__main__':
from emis_funky_funktions import cur2, flip # from emis_funky_funktions import cur2, flip
from build_oracle import print_oracle_table_enum, oracle_table # from build_oracle import print_oracle_table_enum, oracle_table
print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore # print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
from build_oracle import oracle_table
from parse import parser
from lex import tokenize
with open('sample.cnf') as file:
lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))
oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
maybe_ast = parser_(lexemes)
match maybe_ast:
case Ok([ast]):
print(ast)
case Ok(huh):
print('Unexpected end result: ', huh)
case Err((Lexeme(token, text, line, col_start, col_end), expected)):
print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')

5
lex.py
View file

@ -35,12 +35,13 @@ def try_lex1(regex: Pattern[str], tok: A, input: str, line_no: int, col_no: int)
return None return None
case match: case match:
assert match is not None assert match is not None
return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end()), input[match.end():])) return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end() - 1), input[match.end():]))
def tokenize( def tokenize(
lex_table: Collection[Tuple[Pattern[str], A]], lex_table: Collection[Tuple[Pattern[str], A]],
drop_tokens: Collection[A], drop_tokens: Collection[A],
eof_token: A,
input: str input: str
) -> Result[List[Lexeme[A]], str]: ) -> Result[List[Lexeme[A]], str]:
""" """
@ -86,7 +87,7 @@ def tokenize(
) )
return inner(rest_input, line_no+newline_count, new_col_no, prefix) return inner(rest_input, line_no+newline_count, new_col_no, prefix)
else: else:
return Ok(prefix) return Ok(prefix + [Lexeme(eof_token, '', line_no, col_no, col_no)])
return inner(input, 1, 1, []) return inner(input, 1, 1, [])

View file

@ -103,7 +103,10 @@ def parser(
match stack: match stack:
# A [Variable] # A [Variable]
case [top_of_stack, *popped_stack] if is_var(top_of_stack): case [top_of_stack, *popped_stack] if is_var(top_of_stack):
expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])] try:
expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
except IndexError:
raise Exception('Unexpected end of input. Expected:', _expected(oracle[top_of_stack]))
match expansions: match expansions:
case []: case []:
return Err((lexemes[0], _expected(oracle[top_of_stack]))) return Err((lexemes[0], _expected(oracle[top_of_stack])))