Add ability to build parse tree

This commit is contained in:
Emi Simpson 2023-03-04 16:37:55 -05:00
parent 140fe67d5d
commit 61ee996c7a
Signed by: Emi
GPG key ID: A12F2C2FFDC3D847

View file

@ -1,14 +1,31 @@
from emis_funky_funktions import *
from dataclasses import dataclass
from functools import wraps
from operator import contains
from typing import Callable, Collection, Mapping, TypeGuard
@dataclass(frozen=True)
class Action(Generic[A]):
f: Callable[[Sequence[A]], Sequence[A]]
def __call__(self, i: Sequence[A]) -> Sequence[A]:
return self.f(i)
def _expected(row: Mapping[B, Collection[Sequence[Any]]]) -> Collection[B]:
"""
Given a single row from an oracle table, identify the expected terminals
"""
return [
terminal
for terminal, expansions in row.items()
if len(expansions)
]
def parser(
oracle: Callable[[A, B], Collection[Sequence[A | B]]],
identify_lexeme: Callable[[C], B],
oracle: Mapping[A, Mapping[B, Collection[Sequence[A | B | Action[C | D]]]]],
identify_lexeme: Callable[[D], B],
start_symbol: A,
) -> Callable[[Sequence[C]], bool]:
) -> Callable[[Sequence[D]], Result[Sequence[C | D], Tuple[D, Collection[B]]]]:
"""
Produces a parser based on a grammar, an oracle, and a start symbol.
@ -24,66 +41,85 @@ def parser(
... S = auto()
... Sum = auto()
... Sum_ = auto()
... Term = auto()
>>> class SimpleTerminal(IntEnum):
... Number = auto()
... Letter = auto()
... Plus = auto()
... Eof = auto()
... def __repr__(self):
... return self.name
>>> build_S = Action(lambda x: x[:-1])
>>> build_Sum = Action(lambda x: (*x[:-2], x[-1](int(x[-2][1]))))
>>> build_Sum_1 = Action(lambda x: (*x[:-2], lambda y: x[-1] + y))
>>> build_Sum_2 = Action(lambda x: (*x, lambda y: y))
>>> grammar = [
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof]),
... (SimpleVariable.Sum, [SimpleVariable.Term, SimpleVariable.Sum_]),
... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum]),
... (SimpleVariable.Sum_, []),
... (SimpleVariable.Term, [SimpleTerminal.Number]),
... (SimpleVariable.Term, [SimpleTerminal.Letter]),
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]),
... (SimpleVariable.Sum, [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]),
... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum, build_Sum_1]),
... (SimpleVariable.Sum_, [build_Sum_2]),
... ]
>>> my_oracle_table = oracle(flip(cur2(isinstance))(SimpleTerminal), grammar)
>>> is_term = p_instance(SimpleTerminal)
>>> is_var = p_instance(SimpleVariable)
>>> my_oracle_table = oracle_table(is_term, is_var, grammar)
>>> my_parser = parser(my_oracle_table, lambda x: x[0], SimpleVariable.S)
>>> my_parser([
... (SimpleTerminal.Number, 1),
... (SimpleTerminal.Plus,),
... (SimpleTerminal.Letter, 'x'),
... (SimpleTerminal.Number, 3),
... (SimpleTerminal.Plus,),
... (SimpleTerminal.Number, 10),
... (SimpleTerminal.Eof,),
... ])
True
Ok((14,))
>>> my_parser([
... (SimpleTerminal.Number, 1),
... (SimpleTerminal.Plus,),
... (SimpleTerminal.Letter, 'x'),
... (SimpleTerminal.Number, 3),
... (SimpleTerminal.Number, 10), # <--- this is invalid!
... (SimpleTerminal.Eof,),
... ])
False
Err(((Number, 10), [Plus, Eof]))
"""
is_variable: Callable[[A | B], TypeGuard[A]] = flip(cur2(isinstance))(start_symbol.__class__) #type: ignore
@cur2
def inner(stack: Sequence[A | B], lexemes: Sequence[C]) -> bool:
is_var: Callable[[Any], TypeGuard[A]] = p_instance(start_symbol.__class__)
is_tok: Callable[[Any], TypeGuard[B]] = p_instance(next(iter(oracle[start_symbol].keys())).__class__)
def inner(
stack: Sequence[A | B | Action[C | D]],
ast_stack: Sequence[C | D],
lexemes: Sequence[D],
) -> Result[Sequence[C | D], Tuple[D, Collection[B]]]:
match stack:
case [top_of_stack, *popped_stack] if is_variable(top_of_stack):
expansions = oracle(top_of_stack, identify_lexeme(lexemes[0]))
# Action
case [Action(f), *popped_stack]:
return inner(popped_stack, f(ast_stack), lexemes)
# A [Variable]
case [top_of_stack, *popped_stack] if is_var(top_of_stack):
expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
match expansions:
case []:
return False
return Err((lexemes[0], _expected(oracle[top_of_stack])))
case [expansion]:
return inner((*expansion, *popped_stack))(lexemes)
return inner((*expansion, *popped_stack), ast_stack, lexemes)
case _:
raise Exception('Not an LL(1) grammar!!!')
# B [Token] (match)
case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
return inner(stack[1:])(lexemes[1:])
return inner(popped_stack, (*ast_stack, lexemes[0]), lexemes[1:])
# B [Token] (no match)
case [top_of_stack, *popped_stack]:
assert is_tok(top_of_stack)
return Err((lexemes[0], (top_of_stack,)))
# Empty stack (finished parsing)
case []:
return len(lexemes) == 0
case _:
return False
if len(lexemes):
return Err((lexemes[0], []))
else:
return Ok(ast_stack)
raise Exception('Unreachable!')
return wraps(parser)(inner([start_symbol]))
return wraps(parser)(p(inner, [start_symbol], []))
if __name__ == '__main__':
import doctest
from enum import auto, IntEnum
from build_oracle import oracle
from build_oracle import oracle_table
doctest.testmod()