Get parsing for the sample document working

This commit is contained in:
Emi Simpson 2023-03-04 22:02:16 -05:00
parent b8b6ba708f
commit 532a5a14d0
Signed by: Emi
GPG key ID: A12F2C2FFDC3D847
3 changed files with 123 additions and 27 deletions

View file

@ -6,10 +6,16 @@ oracle table for the grammar it defines. It's recommended that this be done usi
`build_oracle.sh` instead, however, which will build a whole python module containing the
oracle table, complete with imports.
"""
from emis_funky_funktions import *
from dataclasses import dataclass
from enum import auto, IntEnum
from re import compile, Pattern
from typing import Collection, Mapping, Sequence, Tuple
from lex import Lexeme
from parse import Action
from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
class Tok(IntEnum):
"""
@ -67,51 +73,119 @@ class Variable(IntEnum):
def __repr__(self) -> str:
return f'<{self._name_}>'
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
@dataclass(frozen=True)
class ASTNegated:
term: ASTTerm
def __str__(self) -> str:
return f'¬{self.term}'
@dataclass(frozen=True)
class ASTProp:
ident: Lexeme[Tok]
arguments: Sequence[ASTTerm]
def __str__(self) -> str:
if len(self.arguments):
return f'{self.ident.matched_string}({",".join(map(str, self.arguments))})'
else:
return self.ident.matched_string
@dataclass(frozen=True)
class AST:
predicate_idents: Sequence[Lexeme[Tok]]
variable_idents: Sequence[Lexeme[Tok]]
const_idents: Sequence[Lexeme[Tok]]
func_idents: Sequence[Lexeme[Tok]]
clauses: Sequence[Sequence[ASTTerm]]
def __str__(self) -> str:
return (
'Predicates: ' + repr([i.matched_string for i in self.predicate_idents]) + '\n' +
'Variables: ' + repr([i.matched_string for i in self.variable_idents]) + '\n' +
'Constants: ' + repr([i.matched_string for i in self.const_idents]) + '\n' +
'Functions: ' + repr([i.matched_string for i in self.func_idents]) + '\n' +
'Clauses:\n' + '\n'.join(' or '.join(str(term) for term in clause) for clause in self.clauses) + '\n'
)
def cons(stack: Sequence[Any]) -> Sequence[Any]:
match stack:
case [rest, head, *popped_stack]:
return ((head, *rest), *popped_stack)
case bad_stack:
raise Exception("Unexpected stack state!", bad_stack)
nil: Sequence[Any] = tuple()
@cur2
def introduce(
cons: Any,
stack: Sequence[Any]
) -> Sequence[Any]:
return (cons, *stack)
def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
match stack:
case [arg, func, *popped_stack] if hasattr(func, '__call__'):
return (func(arg), *popped_stack)
raise Exception("Unexpected stack state!", stack)
@cur2
def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
match stack:
case [arg, *popped_stack]:
return (func(arg), *popped_stack)
case bad_stack:
raise Exception("Unexpected stack state!", bad_stack)
def drop(stack: Sequence[Any]) -> Sequence[Any]:
return stack[1:]
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
(Variable.Start,
[ Tok.PredicateSection, Variable.Idents, Tok.Newline
, Tok.VariablesSection, Variable.Idents, Tok.Newline
, Tok.ConstantsSection, Variable.Idents, Tok.Newline
, Tok.FunctionsSection, Variable.Idents, Tok.Newline
, Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
[ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,AST)), Tok.Newline, drop
, Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
, Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
, Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
, Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
(Variable.Idents,
[ Tok.Identifier, Variable.Idents ]),
[ Tok.Identifier, Variable.Idents, cons ]),
(Variable.Idents,
[ ]),
[ introduce(nil) ]),
(Variable.Clauses,
[ Tok.Newline, Variable.Clauses_ ]),
[ Tok.Newline, drop, Variable.Clauses_ ]),
(Variable.Clauses,
[ ]),
[ introduce(nil) ]),
(Variable.Clauses_,
[ Variable.Clause, Variable.Clauses ]),
[ Variable.Clause, Variable.Clauses, cons ]),
(Variable.Clauses_,
[ ]),
[ introduce(nil) ]),
(Variable.Clause,
[ Variable.Term, Variable.Clause_ ]),
[ Variable.Term, Variable.Clause_, cons ]),
(Variable.Clause_,
[ Variable.Clause ]),
(Variable.Clause_,
[ ]),
[ introduce(nil) ]),
(Variable.Term,
[ Tok.Negate, Variable.Term ]),
[ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]),
(Variable.Term,
[ Tok.Identifier, Variable.Func ]),
[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
(Variable.Func,
[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
(Variable.Func,
[ ]),
[ introduce(nil) ]),
(Variable.CSTerms,
[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
(Variable.CSTerms,
[ ]),
[ introduce(nil) ]),
]
"""
Implements the following grammar:
@ -147,6 +221,24 @@ CSTerms := Comma <Term> <CSTerms>
"""
if __name__ == '__main__':
from emis_funky_funktions import cur2, flip
from build_oracle import print_oracle_table_enum, oracle_table
print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
# from emis_funky_funktions import cur2, flip
# from build_oracle import print_oracle_table_enum, oracle_table
# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
from build_oracle import oracle_table
from parse import parser
from lex import tokenize
with open('sample.cnf') as file:
lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))
oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
maybe_ast = parser_(lexemes)
match maybe_ast:
case Ok([ast]):
print(ast)
case Ok(huh):
print('Unexpected end result: ', huh)
case Err((Lexeme(token, text, line, col_start, col_end), expected)):
print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')

5
lex.py
View file

@ -35,12 +35,13 @@ def try_lex1(regex: Pattern[str], tok: A, input: str, line_no: int, col_no: int)
return None
case match:
assert match is not None
return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end()), input[match.end():]))
return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end() - 1), input[match.end():]))
def tokenize(
lex_table: Collection[Tuple[Pattern[str], A]],
drop_tokens: Collection[A],
eof_token: A,
input: str
) -> Result[List[Lexeme[A]], str]:
"""
@ -86,7 +87,7 @@ def tokenize(
)
return inner(rest_input, line_no+newline_count, new_col_no, prefix)
else:
return Ok(prefix)
return Ok(prefix + [Lexeme(eof_token, '', line_no, col_no, col_no)])
return inner(input, 1, 1, [])

View file

@ -103,7 +103,10 @@ def parser(
match stack:
# A [Variable]
case [top_of_stack, *popped_stack] if is_var(top_of_stack):
expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
try:
expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
except IndexError:
raise Exception('Unexpected end of input. Expected:', _expected(oracle[top_of_stack]))
match expansions:
case []:
return Err((lexemes[0], _expected(oracle[top_of_stack])))