89 lines
3.2 KiB
Python
89 lines
3.2 KiB
Python
|
from emis_funky_funktions import *
|
||
|
|
||
|
from functools import wraps
|
||
|
from operator import contains
|
||
|
from typing import Callable, Collection, Mapping, TypeGuard
|
||
|
|
||
|
def parser(
|
||
|
oracle: Callable[[A, B], Collection[Sequence[A | B]]],
|
||
|
identify_lexeme: Callable[[C], B],
|
||
|
start_symbol: A,
|
||
|
) -> Callable[[Sequence[C]], bool]:
|
||
|
"""
|
||
|
Produces a parser based on a grammar, an oracle, and a start symbol.
|
||
|
|
||
|
The `identify_lexeme` argument should be a function which converts a lexeme into the
|
||
|
token that it represents. This allows for the actual lexemes that are being fed in to
|
||
|
be more complex, and store additional data.
|
||
|
|
||
|
### Example:
|
||
|
|
||
|
We generate a simple grammar:
|
||
|
|
||
|
>>> class SimpleVariable(IntEnum):
|
||
|
... S = auto()
|
||
|
... Sum = auto()
|
||
|
... Sum_ = auto()
|
||
|
... Term = auto()
|
||
|
>>> class SimpleTerminal(IntEnum):
|
||
|
... Number = auto()
|
||
|
... Letter = auto()
|
||
|
... Plus = auto()
|
||
|
... Eof = auto()
|
||
|
>>> grammar = [
|
||
|
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof]),
|
||
|
... (SimpleVariable.Sum, [SimpleVariable.Term, SimpleVariable.Sum_]),
|
||
|
... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum]),
|
||
|
... (SimpleVariable.Sum_, []),
|
||
|
... (SimpleVariable.Term, [SimpleTerminal.Number]),
|
||
|
... (SimpleVariable.Term, [SimpleTerminal.Letter]),
|
||
|
... ]
|
||
|
>>> my_oracle_table = oracle(flip(cur2(isinstance))(SimpleTerminal), grammar)
|
||
|
>>> my_parser = parser(my_oracle_table, lambda x: x[0], SimpleVariable.S)
|
||
|
|
||
|
>>> my_parser([
|
||
|
... (SimpleTerminal.Number, 1),
|
||
|
... (SimpleTerminal.Plus,),
|
||
|
... (SimpleTerminal.Letter, 'x'),
|
||
|
... (SimpleTerminal.Plus,),
|
||
|
... (SimpleTerminal.Number, 10),
|
||
|
... (SimpleTerminal.Eof,),
|
||
|
... ])
|
||
|
True
|
||
|
|
||
|
>>> my_parser([
|
||
|
... (SimpleTerminal.Number, 1),
|
||
|
... (SimpleTerminal.Plus,),
|
||
|
... (SimpleTerminal.Letter, 'x'),
|
||
|
... (SimpleTerminal.Number, 10), # <--- this is invalid!
|
||
|
... (SimpleTerminal.Eof,),
|
||
|
... ])
|
||
|
False
|
||
|
"""
|
||
|
is_variable: Callable[[A | B], TypeGuard[A]] = flip(cur2(isinstance))(start_symbol.__class__) #type: ignore
|
||
|
@cur2
|
||
|
def inner(stack: Sequence[A | B], lexemes: Sequence[C]) -> bool:
|
||
|
match stack:
|
||
|
case [top_of_stack, *popped_stack] if is_variable(top_of_stack):
|
||
|
expansions = oracle(top_of_stack, identify_lexeme(lexemes[0]))
|
||
|
match expansions:
|
||
|
case []:
|
||
|
return False
|
||
|
case [expansion]:
|
||
|
return inner((*expansion, *popped_stack))(lexemes)
|
||
|
case _:
|
||
|
raise Exception('Not an LL(1) grammar!!!')
|
||
|
case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
|
||
|
return inner(stack[1:])(lexemes[1:])
|
||
|
case []:
|
||
|
return len(lexemes) == 0
|
||
|
case _:
|
||
|
return False
|
||
|
raise Exception('Unreachable!')
|
||
|
return wraps(parser)(inner([start_symbol]))
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
import doctest
|
||
|
from enum import auto, IntEnum
|
||
|
from build_oracle import oracle
|
||
|
doctest.testmod()
|