Added a basic recognizer

This commit is contained in:
Emi Simpson 2023-03-04 13:31:44 -05:00
parent 5e84e50aa2
commit 6e1952b148
Signed by: Emi
GPG key ID: A12F2C2FFDC3D847

89
parse.py Normal file
View file

@ -0,0 +1,89 @@
from emis_funky_funktions import *
from functools import wraps
from operator import contains
from typing import Callable, Collection, Mapping, TypeGuard
def parser(
oracle: Callable[[A, B], Collection[Sequence[A | B]]],
identify_lexeme: Callable[[C], B],
start_symbol: A,
) -> Callable[[Sequence[C]], bool]:
"""
Produces a parser based on a grammar, an oracle, and a start symbol.
The `identify_lexeme` argument should be a function which converts a lexeme into the
token that it represents. This allows for the actual lexemes that are being fed in to
be more complex, and store additional data.
### Example:
We generate a simple grammar:
>>> class SimpleVariable(IntEnum):
... S = auto()
... Sum = auto()
... Sum_ = auto()
... Term = auto()
>>> class SimpleTerminal(IntEnum):
... Number = auto()
... Letter = auto()
... Plus = auto()
... Eof = auto()
>>> grammar = [
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof]),
... (SimpleVariable.Sum, [SimpleVariable.Term, SimpleVariable.Sum_]),
... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum]),
... (SimpleVariable.Sum_, []),
... (SimpleVariable.Term, [SimpleTerminal.Number]),
... (SimpleVariable.Term, [SimpleTerminal.Letter]),
... ]
>>> my_oracle_table = oracle(flip(cur2(isinstance))(SimpleTerminal), grammar)
>>> my_parser = parser(my_oracle_table, lambda x: x[0], SimpleVariable.S)
>>> my_parser([
... (SimpleTerminal.Number, 1),
... (SimpleTerminal.Plus,),
... (SimpleTerminal.Letter, 'x'),
... (SimpleTerminal.Plus,),
... (SimpleTerminal.Number, 10),
... (SimpleTerminal.Eof,),
... ])
True
>>> my_parser([
... (SimpleTerminal.Number, 1),
... (SimpleTerminal.Plus,),
... (SimpleTerminal.Letter, 'x'),
... (SimpleTerminal.Number, 10), # <--- this is invalid!
... (SimpleTerminal.Eof,),
... ])
False
"""
is_variable: Callable[[A | B], TypeGuard[A]] = flip(cur2(isinstance))(start_symbol.__class__) #type: ignore
@cur2
def inner(stack: Sequence[A | B], lexemes: Sequence[C]) -> bool:
match stack:
case [top_of_stack, *popped_stack] if is_variable(top_of_stack):
expansions = oracle(top_of_stack, identify_lexeme(lexemes[0]))
match expansions:
case []:
return False
case [expansion]:
return inner((*expansion, *popped_stack))(lexemes)
case _:
raise Exception('Not an LL(1) grammar!!!')
case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
return inner(stack[1:])(lexemes[1:])
case []:
return len(lexemes) == 0
case _:
return False
raise Exception('Unreachable!')
return wraps(parser)(inner([start_symbol]))
if __name__ == '__main__':
import doctest
from enum import auto, IntEnum
from build_oracle import oracle
doctest.testmod()