Add ability to build parse tree
This commit is contained in:
parent
140fe67d5d
commit
61ee996c7a
94
parse.py
94
parse.py
|
@ -1,14 +1,31 @@
|
|||
from emis_funky_funktions import *
|
||||
|
||||
from dataclasses import dataclass
|
||||
from functools import wraps
|
||||
from operator import contains
|
||||
from typing import Callable, Collection, Mapping, TypeGuard
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Action(Generic[A]):
|
||||
f: Callable[[Sequence[A]], Sequence[A]]
|
||||
def __call__(self, i: Sequence[A]) -> Sequence[A]:
|
||||
return self.f(i)
|
||||
|
||||
def _expected(row: Mapping[B, Collection[Sequence[Any]]]) -> Collection[B]:
|
||||
"""
|
||||
Given a single row from an oracle table, identify the expected terminals
|
||||
"""
|
||||
return [
|
||||
terminal
|
||||
for terminal, expansions in row.items()
|
||||
if len(expansions)
|
||||
]
|
||||
|
||||
def parser(
|
||||
oracle: Callable[[A, B], Collection[Sequence[A | B]]],
|
||||
identify_lexeme: Callable[[C], B],
|
||||
oracle: Mapping[A, Mapping[B, Collection[Sequence[A | B | Action[C | D]]]]],
|
||||
identify_lexeme: Callable[[D], B],
|
||||
start_symbol: A,
|
||||
) -> Callable[[Sequence[C]], bool]:
|
||||
) -> Callable[[Sequence[D]], Result[Sequence[C | D], Tuple[D, Collection[B]]]]:
|
||||
"""
|
||||
Produces a parser based on a grammar, an oracle, and a start symbol.
|
||||
|
||||
|
@ -24,66 +41,85 @@ def parser(
|
|||
... S = auto()
|
||||
... Sum = auto()
|
||||
... Sum_ = auto()
|
||||
... Term = auto()
|
||||
>>> class SimpleTerminal(IntEnum):
|
||||
... Number = auto()
|
||||
... Letter = auto()
|
||||
... Plus = auto()
|
||||
... Eof = auto()
|
||||
... def __repr__(self):
|
||||
... return self.name
|
||||
>>> build_S = Action(lambda x: x[:-1])
|
||||
>>> build_Sum = Action(lambda x: (*x[:-2], x[-1](int(x[-2][1]))))
|
||||
>>> build_Sum_1 = Action(lambda x: (*x[:-2], lambda y: x[-1] + y))
|
||||
>>> build_Sum_2 = Action(lambda x: (*x, lambda y: y))
|
||||
>>> grammar = [
|
||||
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof]),
|
||||
... (SimpleVariable.Sum, [SimpleVariable.Term, SimpleVariable.Sum_]),
|
||||
... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum]),
|
||||
... (SimpleVariable.Sum_, []),
|
||||
... (SimpleVariable.Term, [SimpleTerminal.Number]),
|
||||
... (SimpleVariable.Term, [SimpleTerminal.Letter]),
|
||||
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]),
|
||||
... (SimpleVariable.Sum, [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]),
|
||||
... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum, build_Sum_1]),
|
||||
... (SimpleVariable.Sum_, [build_Sum_2]),
|
||||
... ]
|
||||
>>> my_oracle_table = oracle(flip(cur2(isinstance))(SimpleTerminal), grammar)
|
||||
>>> is_term = p_instance(SimpleTerminal)
|
||||
>>> is_var = p_instance(SimpleVariable)
|
||||
>>> my_oracle_table = oracle_table(is_term, is_var, grammar)
|
||||
>>> my_parser = parser(my_oracle_table, lambda x: x[0], SimpleVariable.S)
|
||||
|
||||
>>> my_parser([
|
||||
... (SimpleTerminal.Number, 1),
|
||||
... (SimpleTerminal.Plus,),
|
||||
... (SimpleTerminal.Letter, 'x'),
|
||||
... (SimpleTerminal.Number, 3),
|
||||
... (SimpleTerminal.Plus,),
|
||||
... (SimpleTerminal.Number, 10),
|
||||
... (SimpleTerminal.Eof,),
|
||||
... ])
|
||||
True
|
||||
Ok((14,))
|
||||
|
||||
>>> my_parser([
|
||||
... (SimpleTerminal.Number, 1),
|
||||
... (SimpleTerminal.Plus,),
|
||||
... (SimpleTerminal.Letter, 'x'),
|
||||
... (SimpleTerminal.Number, 3),
|
||||
... (SimpleTerminal.Number, 10), # <--- this is invalid!
|
||||
... (SimpleTerminal.Eof,),
|
||||
... ])
|
||||
False
|
||||
Err(((Number, 10), [Plus, Eof]))
|
||||
"""
|
||||
is_variable: Callable[[A | B], TypeGuard[A]] = flip(cur2(isinstance))(start_symbol.__class__) #type: ignore
|
||||
@cur2
|
||||
def inner(stack: Sequence[A | B], lexemes: Sequence[C]) -> bool:
|
||||
is_var: Callable[[Any], TypeGuard[A]] = p_instance(start_symbol.__class__)
|
||||
is_tok: Callable[[Any], TypeGuard[B]] = p_instance(next(iter(oracle[start_symbol].keys())).__class__)
|
||||
def inner(
|
||||
stack: Sequence[A | B | Action[C | D]],
|
||||
ast_stack: Sequence[C | D],
|
||||
lexemes: Sequence[D],
|
||||
) -> Result[Sequence[C | D], Tuple[D, Collection[B]]]:
|
||||
match stack:
|
||||
case [top_of_stack, *popped_stack] if is_variable(top_of_stack):
|
||||
expansions = oracle(top_of_stack, identify_lexeme(lexemes[0]))
|
||||
# Action
|
||||
case [Action(f), *popped_stack]:
|
||||
return inner(popped_stack, f(ast_stack), lexemes)
|
||||
# A [Variable]
|
||||
case [top_of_stack, *popped_stack] if is_var(top_of_stack):
|
||||
expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
|
||||
match expansions:
|
||||
case []:
|
||||
return False
|
||||
return Err((lexemes[0], _expected(oracle[top_of_stack])))
|
||||
case [expansion]:
|
||||
return inner((*expansion, *popped_stack))(lexemes)
|
||||
return inner((*expansion, *popped_stack), ast_stack, lexemes)
|
||||
case _:
|
||||
raise Exception('Not an LL(1) grammar!!!')
|
||||
# B [Token] (match)
|
||||
case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
|
||||
return inner(stack[1:])(lexemes[1:])
|
||||
return inner(popped_stack, (*ast_stack, lexemes[0]), lexemes[1:])
|
||||
# B [Token] (no match)
|
||||
case [top_of_stack, *popped_stack]:
|
||||
assert is_tok(top_of_stack)
|
||||
return Err((lexemes[0], (top_of_stack,)))
|
||||
# Empty stack (finished parsing)
|
||||
case []:
|
||||
return len(lexemes) == 0
|
||||
case _:
|
||||
return False
|
||||
if len(lexemes):
|
||||
return Err((lexemes[0], []))
|
||||
else:
|
||||
return Ok(ast_stack)
|
||||
raise Exception('Unreachable!')
|
||||
return wraps(parser)(inner([start_symbol]))
|
||||
return wraps(parser)(p(inner, [start_symbol], []))
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
from enum import auto, IntEnum
|
||||
from build_oracle import oracle
|
||||
from build_oracle import oracle_table
|
||||
doctest.testmod()
|
Loading…
Reference in a new issue