Add action erasure to parser

This commit is contained in:
Emi Simpson 2023-03-04 14:55:34 -05:00
parent da35286207
commit 140fe67d5d
Signed by: Emi
GPG key ID: A12F2C2FFDC3D847

View file

@ -11,6 +11,27 @@ from functools import cache, reduce
from operator import getitem
from typing import Any, cast, Collection, Mapping, Sequence, Set, Tuple, TypeGuard, TypeVar
def _erase_actions_h(
handle: Sequence[A | B | C],
is_not_c: Callable[[A | B | C], TypeGuard[A | B]]
) -> Sequence[A | B]:
"""
Produce an identical handle, but with all the actions removed
"""
return [i for i in handle if is_not_c(i)]
def _erase_actions(
grammar: Sequence[Tuple[A, Sequence[A | B | C]]],
is_not_c: Callable[[A | B | C], TypeGuard[A | B]]
) -> Sequence[Tuple[A, Sequence[A | B]]]:
"""
Produce an identical grammar, but with all the actions removed
"""
return [
(var, _erase_actions_h(handle, is_not_c))
for (var, handle) in grammar
]
def _first(
is_term: Callable[[A | B], TypeGuard[B]],
grammar: Sequence[Tuple[A, Sequence[A | B]]],
@ -127,9 +148,10 @@ def _predict(
return first_rhs
def oracle(
is_term: Callable[[A | B], TypeGuard[B]],
grammar: Sequence[Tuple[A, Sequence[A | B]]],
) -> Callable[[A, B], Collection[Sequence[A | B]]]:
is_term: Callable[[A | B | C], TypeGuard[B]],
is_var: Callable[[A | B | C], TypeGuard[A]],
grammar: Sequence[Tuple[A, Sequence[A | B | C]]],
) -> Callable[[A, B], Collection[Sequence[A | B | C]]]:
"""
Show valid expansions of a variable based on the next terminal to be read
@ -137,7 +159,9 @@ def oracle(
The inner method constructed is memoized for your convenience.
>>> my_oracle = oracle(flip(cur2(isinstance))(Tok), GRAMMAR)
>>> is_tok = p_instance(Tok)
>>> is_var = p_instance(Variable)
>>> my_oracle = oracle(is_tok, is_var, GRAMMAR)
One valid expansion:
>>> my_oracle(Variable.Clauses_, Tok.Negate)
@ -151,21 +175,24 @@ def oracle(
>>> my_oracle(Variable.Term, Tok.Newline)
[]
"""
follow = _follow(is_term, grammar)
is_not_c: Callable[[A | B | C], TypeGuard[A | B]] = lambda x: is_term(x) or is_var(x) #type:ignore
e_grammar: Sequence[Tuple[A, Sequence[A | B]]] = _erase_actions(grammar, is_not_c)
follow = _follow(is_term, e_grammar)
@wraps(oracle)
@cache
def inner(v: A, c: B) -> Collection[Sequence[A | B]]:
def inner(v: A, c: B) -> Collection[Sequence[A | B | C]]:
return [
handle
for (lhs, handle) in grammar
if lhs == v
and c in _predict(is_term, grammar, follow, lhs, handle)
and c in _predict(is_term, e_grammar, follow, lhs, _erase_actions_h(handle, is_not_c))
]
return inner
def oracle_table(
is_term: Callable[[A | B], TypeGuard[B]],
is_var: Callable[[A | B], TypeGuard[A]],
grammar: Sequence[Tuple[A, Sequence[A | B]]],
) -> Mapping[A, Mapping[B, Collection[Sequence[A | B]]]]:
"""
@ -173,7 +200,9 @@ def oracle_table(
No significant performance benefit
>>> my_oracle_table = oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR)
>>> is_tok = p_instance(Tok)
>>> is_var = p_instance(Variable)
>>> my_oracle_table = oracle_table(is_tok, is_var, GRAMMAR)
One valid expansion:
>>> my_oracle_table[Variable.Clauses_][Tok.Negate]
@ -189,7 +218,7 @@ def oracle_table(
"""
all_variables = { lhs for (lhs, rhs) in grammar }
all_terminals = { symbol for (lhs, rhs) in grammar for symbol in rhs if is_term(symbol) }
the_oracle = oracle(is_term, grammar)
the_oracle = oracle(is_term, is_var, grammar)
return {
v: {
t: the_oracle(v, t)
@ -231,7 +260,9 @@ def print_oracle_table(
... (SimpleVariable.Term, [SimpleTerminal.Letter]),
... ]
>>> my_oracle_table = oracle_table(flip(cur2(isinstance))(SimpleTerminal), grammar)
>>> is_tok = p_instance(SimpleTerminal)
>>> is_var = p_instance(SimpleVariable)
>>> my_oracle_table = oracle_table(is_tok, is_var, grammar)
>>> rendered_oracle_table = print_oracle_table(my_oracle_table, lambda e: f'{e.__class__.__name__}.{e.name}')
>>> print(rendered_oracle_table) #doctest: +NORMALIZE_WHITESPACE
{
@ -298,7 +329,9 @@ def print_oracle_table_enum(
... (SimpleVariable.Term, [SimpleTerminal.Letter]),
... ]
>>> my_oracle_table = oracle_table(flip(cur2(isinstance))(SimpleTerminal), grammar)
>>> is_tok = p_instance(SimpleTerminal)
>>> is_var = p_instance(SimpleVariable)
>>> my_oracle_table = oracle_table(is_tok, is_var, grammar)
>>> rendered_oracle_table = print_oracle_table_enum(my_oracle_table)
>>> print(rendered_oracle_table) #doctest: +NORMALIZE_WHITESPACE
{