199 lines
6.8 KiB
Python
199 lines
6.8 KiB
Python
"""
|
|
Tools for building an oracle table
|
|
|
|
See `grammar` and `build_oracle.sh` for scripts which actually produce python code. This
|
|
module only produces an oracle table in python, without outputting it.
|
|
"""
|
|
from emis_funky_funktions import *
|
|
|
|
from enum import auto, Enum, IntEnum
|
|
from functools import cache, reduce
|
|
from operator import getitem
|
|
from typing import Any, cast, Collection, Mapping, Sequence, Set, Tuple, TypeGuard, TypeVar
|
|
|
|
def _erase_actions_h(
|
|
handle: Sequence[A | B | C],
|
|
is_not_c: Callable[[A | B | C], TypeGuard[A | B]]
|
|
) -> Sequence[A | B]:
|
|
"""
|
|
Produce an identical handle, but with all the actions removed
|
|
"""
|
|
return [i for i in handle if is_not_c(i)]
|
|
|
|
def _erase_actions(
|
|
grammar: Sequence[Tuple[A, Sequence[A | B | C]]],
|
|
is_not_c: Callable[[A | B | C], TypeGuard[A | B]]
|
|
) -> Sequence[Tuple[A, Sequence[A | B]]]:
|
|
"""
|
|
Produce an identical grammar, but with all the actions removed
|
|
"""
|
|
return [
|
|
(var, _erase_actions_h(handle, is_not_c))
|
|
for (var, handle) in grammar
|
|
]
|
|
|
|
def _first(
|
|
is_term: Callable[[A | B], TypeGuard[B]],
|
|
grammar: Sequence[Tuple[A, Sequence[A | B]]],
|
|
sequence: Sequence[A | B]
|
|
) -> Tuple[Collection[B], bool]:
|
|
"""
|
|
Computes all of the possible starting terminals for a handle in a given grammar
|
|
|
|
Due to pathetic python weaknesses, the first argument you must provide is a type guard
|
|
to determine whether a certain thing is a terminal as opposed to a variable.
|
|
|
|
Then, pass in the grammar and the sequence of terminals and variables in question.
|
|
|
|
The output contains two values. The first is a set of possible terminals, and the
|
|
second is a boolean indicating whether this term can derive epsilon.
|
|
|
|
>>> _first(flip(cur2(isinstance))(Tok), GRAMMAR, [Variable.Clause])
|
|
({Negate, Identifier}, False)
|
|
|
|
>>> _first(flip(cur2(isinstance))(Tok), GRAMMAR, [Variable.CSTerms])
|
|
({Comma}, True)
|
|
|
|
>>> _first(flip(cur2(isinstance))(Tok), GRAMMAR, [Variable.CSTerms, Tok.CloseP])
|
|
({CloseP, Comma}, False)
|
|
"""
|
|
def inner(vs: Sequence[A | B]) -> Tuple[Set[B], bool]:
|
|
match vs:
|
|
case []:
|
|
return (set(), True)
|
|
case [v, *rest] if is_term(v):
|
|
return ({v}, False)
|
|
case [v, *rest]:
|
|
this_variable_first, derives_epsilon = reduce(
|
|
lambda acc, result: (acc[0] | result[0], acc[1] or result[1]),
|
|
[
|
|
inner(handle)
|
|
for (other_variable, handle) in grammar
|
|
if other_variable == v
|
|
]
|
|
)
|
|
if derives_epsilon:
|
|
rest_first, rest_derives_epsilon = inner(rest)
|
|
return (rest_first | this_variable_first, rest_derives_epsilon)
|
|
else:
|
|
return (this_variable_first, False)
|
|
raise Exception("UNREACHABLE")
|
|
return inner(sequence)
|
|
|
|
def _follow(
|
|
is_term: Callable[[A | B], TypeGuard[B]],
|
|
grammar: Sequence[Tuple[A, Sequence[A | B]]],
|
|
) -> Mapping[A, Collection[B]]:
|
|
"""
|
|
Produce a table indicating exactly which terminals can follow each variable
|
|
|
|
>>> _follow(flip(cur2(isinstance))(Tok), GRAMMAR) #doctest: +NORMALIZE_WHITESPACE
|
|
{<Start>: set(),
|
|
<Idents>: {Newline},
|
|
<Clauses>: {Eof},
|
|
<Clauses_>: {Eof},
|
|
<Clause>: {Newline, Eof},
|
|
<Clause_>: {Newline, Eof},
|
|
<Term>: {Newline, Negate, CloseP, Comma, Identifier, Eof},
|
|
<Func>: {Newline, Negate, CloseP, Comma, Identifier, Eof},
|
|
<CSTerms>: {CloseP}}
|
|
"""
|
|
follow_table: Mapping[A, Set[B]] = {
|
|
variable: set()
|
|
for (variable, _) in grammar
|
|
}
|
|
def following_tokens(handle: Sequence[A | B], follows_handle: Set[B]) -> Set[B]:
|
|
handle_first, handle_derives_epsilon = _first(is_term, grammar, handle)
|
|
return set(handle_first) | (follows_handle if handle_derives_epsilon else set())
|
|
|
|
def inner(prev_table: Mapping[A, Set[B]]) -> Mapping[A, Set[B]]:
|
|
new_table = reduce(
|
|
lambda acc, entry: acc | {entry[0]: acc[entry[0]] | entry[1]},
|
|
[
|
|
(
|
|
cast(A, handle[i]),
|
|
following_tokens(handle[i+1:], prev_table[variable])
|
|
)
|
|
for (variable, handle) in grammar
|
|
for i in range(len(handle))
|
|
if not is_term(handle[i])
|
|
],
|
|
prev_table
|
|
)
|
|
if new_table == prev_table:
|
|
return new_table
|
|
else:
|
|
return inner(new_table)
|
|
return inner(follow_table)
|
|
|
|
def _predict(
|
|
is_term: Callable[[A | B], TypeGuard[B]],
|
|
grammar: Sequence[Tuple[A, Sequence[A | B]]],
|
|
follow: Mapping[A, Collection[B]],
|
|
lhs: A,
|
|
rhs: Sequence[A | B]
|
|
) -> Collection[B]:
|
|
"""
|
|
Given a production, identify the terminals which this production would be valid under
|
|
|
|
>>> is_tok = flip(cur2(isinstance))(Tok)
|
|
>>> follow = _follow(is_tok, GRAMMAR)
|
|
>>> _predict(is_tok, GRAMMAR, follow, Variable.Clause, [Variable.Term, Variable.Clause_])
|
|
{Negate, Identifier}
|
|
"""
|
|
first_rhs, epsilon_rhs = _first(is_term, grammar, rhs)
|
|
if epsilon_rhs:
|
|
return set(follow[lhs]) | set(first_rhs)
|
|
else:
|
|
return first_rhs
|
|
|
|
def oracle_table(
|
|
is_term: Callable[[A | B], TypeGuard[B]],
|
|
is_var: Callable[[A | B], TypeGuard[A]],
|
|
grammar: Sequence[Tuple[A, Sequence[A | B]]],
|
|
) -> Mapping[A, Mapping[B, Collection[Sequence[A | B]]]]:
|
|
"""
|
|
A variant of `_oracle` that generates a table immediately rather than lazily
|
|
|
|
No significant performance benefit
|
|
|
|
>>> is_tok = p_instance(Tok)
|
|
>>> is_var = p_instance(Variable)
|
|
>>> my_oracle_table = oracle_table(is_tok, is_var, GRAMMAR)
|
|
|
|
One valid expansion:
|
|
>>> my_oracle_table[Variable.Clauses_][Tok.Negate]
|
|
[[<Clause>, <Clauses>]]
|
|
|
|
One valid expansion, but it expands to epsilon:
|
|
>>> my_oracle_table[Variable.Clauses_][Tok.Eof]
|
|
[[]]
|
|
|
|
Zero valid expansions:
|
|
>>> my_oracle_table[Variable.Term][Tok.Newline]
|
|
[]
|
|
"""
|
|
all_variables = { lhs for (lhs, rhs) in grammar }
|
|
all_terminals = { symbol for (lhs, rhs) in grammar for symbol in rhs if is_term(symbol) }
|
|
|
|
is_not_c: Callable[[A | B | C], TypeGuard[A | B]] = lambda x: is_term(x) or is_var(x) #type:ignore
|
|
e_grammar: Sequence[Tuple[A, Sequence[A | B]]] = _erase_actions(grammar, is_not_c) #type:ignore
|
|
follow = _follow(is_term, e_grammar)
|
|
|
|
return {
|
|
v: {
|
|
t: [
|
|
handle
|
|
for (lhs, handle) in grammar
|
|
if lhs == v
|
|
and t in _predict(is_term, e_grammar, follow, lhs, _erase_actions_h(handle, is_not_c)) #type:ignore
|
|
]
|
|
for t in all_terminals
|
|
}
|
|
for v in all_variables
|
|
}
|
|
|
|
if __name__ == '__main__':
|
|
import doctest
|
|
from grammar import GRAMMAR, Tok, Variable
|
|
doctest.testmod() |