Get parsing for the sample document working
This commit is contained in:
parent
b8b6ba708f
commit
532a5a14d0
140
grammar.py
140
grammar.py
|
@ -6,10 +6,16 @@ oracle table for the grammar it defines. It's recommended that this be done usi
|
|||
`build_oracle.sh` instead, however, which will build a whole python module containing the
|
||||
oracle table, complete with imports.
|
||||
"""
|
||||
from emis_funky_funktions import *
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import auto, IntEnum
|
||||
from re import compile, Pattern
|
||||
|
||||
from typing import Collection, Mapping, Sequence, Tuple
|
||||
from lex import Lexeme
|
||||
from parse import Action
|
||||
|
||||
from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
|
||||
|
||||
class Tok(IntEnum):
|
||||
"""
|
||||
|
@ -67,51 +73,119 @@ class Variable(IntEnum):
|
|||
def __repr__(self) -> str:
|
||||
return f'<{self._name_}>'
|
||||
|
||||
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
|
||||
ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ASTNegated:
|
||||
term: ASTTerm
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f'¬{self.term}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ASTProp:
|
||||
ident: Lexeme[Tok]
|
||||
arguments: Sequence[ASTTerm]
|
||||
|
||||
def __str__(self) -> str:
|
||||
if len(self.arguments):
|
||||
return f'{self.ident.matched_string}({",".join(map(str, self.arguments))})'
|
||||
else:
|
||||
return self.ident.matched_string
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AST:
|
||||
predicate_idents: Sequence[Lexeme[Tok]]
|
||||
variable_idents: Sequence[Lexeme[Tok]]
|
||||
const_idents: Sequence[Lexeme[Tok]]
|
||||
func_idents: Sequence[Lexeme[Tok]]
|
||||
clauses: Sequence[Sequence[ASTTerm]]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return (
|
||||
'Predicates: ' + repr([i.matched_string for i in self.predicate_idents]) + '\n' +
|
||||
'Variables: ' + repr([i.matched_string for i in self.variable_idents]) + '\n' +
|
||||
'Constants: ' + repr([i.matched_string for i in self.const_idents]) + '\n' +
|
||||
'Functions: ' + repr([i.matched_string for i in self.func_idents]) + '\n' +
|
||||
'Clauses:\n' + '\n'.join(' or '.join(str(term) for term in clause) for clause in self.clauses) + '\n'
|
||||
)
|
||||
|
||||
def cons(stack: Sequence[Any]) -> Sequence[Any]:
|
||||
match stack:
|
||||
case [rest, head, *popped_stack]:
|
||||
return ((head, *rest), *popped_stack)
|
||||
case bad_stack:
|
||||
raise Exception("Unexpected stack state!", bad_stack)
|
||||
|
||||
nil: Sequence[Any] = tuple()
|
||||
@cur2
|
||||
def introduce(
|
||||
cons: Any,
|
||||
stack: Sequence[Any]
|
||||
) -> Sequence[Any]:
|
||||
return (cons, *stack)
|
||||
|
||||
def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
|
||||
match stack:
|
||||
case [arg, func, *popped_stack] if hasattr(func, '__call__'):
|
||||
return (func(arg), *popped_stack)
|
||||
raise Exception("Unexpected stack state!", stack)
|
||||
@cur2
|
||||
def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
|
||||
match stack:
|
||||
case [arg, *popped_stack]:
|
||||
return (func(arg), *popped_stack)
|
||||
case bad_stack:
|
||||
raise Exception("Unexpected stack state!", bad_stack)
|
||||
|
||||
def drop(stack: Sequence[Any]) -> Sequence[Any]:
|
||||
return stack[1:]
|
||||
|
||||
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
|
||||
(Variable.Start,
|
||||
[ Tok.PredicateSection, Variable.Idents, Tok.Newline
|
||||
, Tok.VariablesSection, Variable.Idents, Tok.Newline
|
||||
, Tok.ConstantsSection, Variable.Idents, Tok.Newline
|
||||
, Tok.FunctionsSection, Variable.Idents, Tok.Newline
|
||||
, Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
|
||||
[ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,AST)), Tok.Newline, drop
|
||||
, Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
||||
, Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
||||
, Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
||||
, Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
|
||||
|
||||
(Variable.Idents,
|
||||
[ Tok.Identifier, Variable.Idents ]),
|
||||
[ Tok.Identifier, Variable.Idents, cons ]),
|
||||
(Variable.Idents,
|
||||
[ ]),
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.Clauses,
|
||||
[ Tok.Newline, Variable.Clauses_ ]),
|
||||
[ Tok.Newline, drop, Variable.Clauses_ ]),
|
||||
(Variable.Clauses,
|
||||
[ ]),
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.Clauses_,
|
||||
[ Variable.Clause, Variable.Clauses ]),
|
||||
[ Variable.Clause, Variable.Clauses, cons ]),
|
||||
(Variable.Clauses_,
|
||||
[ ]),
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.Clause,
|
||||
[ Variable.Term, Variable.Clause_ ]),
|
||||
[ Variable.Term, Variable.Clause_, cons ]),
|
||||
|
||||
(Variable.Clause_,
|
||||
[ Variable.Clause ]),
|
||||
(Variable.Clause_,
|
||||
[ ]),
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.Term,
|
||||
[ Tok.Negate, Variable.Term ]),
|
||||
[ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]),
|
||||
(Variable.Term,
|
||||
[ Tok.Identifier, Variable.Func ]),
|
||||
[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
|
||||
|
||||
(Variable.Func,
|
||||
[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
|
||||
[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
|
||||
(Variable.Func,
|
||||
[ ]),
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.CSTerms,
|
||||
[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
|
||||
[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
|
||||
(Variable.CSTerms,
|
||||
[ ]),
|
||||
[ introduce(nil) ]),
|
||||
]
|
||||
"""
|
||||
Implements the following grammar:
|
||||
|
@ -147,6 +221,24 @@ CSTerms := Comma <Term> <CSTerms>
|
|||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
from emis_funky_funktions import cur2, flip
|
||||
from build_oracle import print_oracle_table_enum, oracle_table
|
||||
print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
|
||||
# from emis_funky_funktions import cur2, flip
|
||||
# from build_oracle import print_oracle_table_enum, oracle_table
|
||||
# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
|
||||
from build_oracle import oracle_table
|
||||
from parse import parser
|
||||
from lex import tokenize
|
||||
|
||||
with open('sample.cnf') as file:
|
||||
lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))
|
||||
|
||||
oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
|
||||
parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
|
||||
maybe_ast = parser_(lexemes)
|
||||
|
||||
match maybe_ast:
|
||||
case Ok([ast]):
|
||||
print(ast)
|
||||
case Ok(huh):
|
||||
print('Unexpected end result: ', huh)
|
||||
case Err((Lexeme(token, text, line, col_start, col_end), expected)):
|
||||
print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')
|
5
lex.py
5
lex.py
|
@ -35,12 +35,13 @@ def try_lex1(regex: Pattern[str], tok: A, input: str, line_no: int, col_no: int)
|
|||
return None
|
||||
case match:
|
||||
assert match is not None
|
||||
return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end()), input[match.end():]))
|
||||
return Some((Lexeme(tok, match.group(), line_no, col_no, col_no + match.end() - 1), input[match.end():]))
|
||||
|
||||
|
||||
def tokenize(
|
||||
lex_table: Collection[Tuple[Pattern[str], A]],
|
||||
drop_tokens: Collection[A],
|
||||
eof_token: A,
|
||||
input: str
|
||||
) -> Result[List[Lexeme[A]], str]:
|
||||
"""
|
||||
|
@ -86,7 +87,7 @@ def tokenize(
|
|||
)
|
||||
return inner(rest_input, line_no+newline_count, new_col_no, prefix)
|
||||
else:
|
||||
return Ok(prefix)
|
||||
return Ok(prefix + [Lexeme(eof_token, '', line_no, col_no, col_no)])
|
||||
return inner(input, 1, 1, [])
|
||||
|
||||
|
||||
|
|
5
parse.py
5
parse.py
|
@ -103,7 +103,10 @@ def parser(
|
|||
match stack:
|
||||
# A [Variable]
|
||||
case [top_of_stack, *popped_stack] if is_var(top_of_stack):
|
||||
expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
|
||||
try:
|
||||
expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
|
||||
except IndexError:
|
||||
raise Exception('Unexpected end of input. Expected:', _expected(oracle[top_of_stack]))
|
||||
match expansions:
|
||||
case []:
|
||||
return Err((lexemes[0], _expected(oracle[top_of_stack])))
|
||||
|
|
Loading…
Reference in a new issue