Added a basic recognizer
This commit is contained in:
parent
5e84e50aa2
commit
6e1952b148
89
parse.py
Normal file
89
parse.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
from emis_funky_funktions import *
|
||||
|
||||
from functools import wraps
|
||||
from operator import contains
|
||||
from typing import Callable, Collection, Mapping, TypeGuard
|
||||
|
||||
def parser(
|
||||
oracle: Callable[[A, B], Collection[Sequence[A | B]]],
|
||||
identify_lexeme: Callable[[C], B],
|
||||
start_symbol: A,
|
||||
) -> Callable[[Sequence[C]], bool]:
|
||||
"""
|
||||
Produces a parser based on a grammar, an oracle, and a start symbol.
|
||||
|
||||
The `identify_lexeme` argument should be a function which converts a lexeme into the
|
||||
token that it represents. This allows for the actual lexemes that are being fed in to
|
||||
be more complex, and store additional data.
|
||||
|
||||
### Example:
|
||||
|
||||
We generate a simple grammar:
|
||||
|
||||
>>> class SimpleVariable(IntEnum):
|
||||
... S = auto()
|
||||
... Sum = auto()
|
||||
... Sum_ = auto()
|
||||
... Term = auto()
|
||||
>>> class SimpleTerminal(IntEnum):
|
||||
... Number = auto()
|
||||
... Letter = auto()
|
||||
... Plus = auto()
|
||||
... Eof = auto()
|
||||
>>> grammar = [
|
||||
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof]),
|
||||
... (SimpleVariable.Sum, [SimpleVariable.Term, SimpleVariable.Sum_]),
|
||||
... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum]),
|
||||
... (SimpleVariable.Sum_, []),
|
||||
... (SimpleVariable.Term, [SimpleTerminal.Number]),
|
||||
... (SimpleVariable.Term, [SimpleTerminal.Letter]),
|
||||
... ]
|
||||
>>> my_oracle_table = oracle(flip(cur2(isinstance))(SimpleTerminal), grammar)
|
||||
>>> my_parser = parser(my_oracle_table, lambda x: x[0], SimpleVariable.S)
|
||||
|
||||
>>> my_parser([
|
||||
... (SimpleTerminal.Number, 1),
|
||||
... (SimpleTerminal.Plus,),
|
||||
... (SimpleTerminal.Letter, 'x'),
|
||||
... (SimpleTerminal.Plus,),
|
||||
... (SimpleTerminal.Number, 10),
|
||||
... (SimpleTerminal.Eof,),
|
||||
... ])
|
||||
True
|
||||
|
||||
>>> my_parser([
|
||||
... (SimpleTerminal.Number, 1),
|
||||
... (SimpleTerminal.Plus,),
|
||||
... (SimpleTerminal.Letter, 'x'),
|
||||
... (SimpleTerminal.Number, 10), # <--- this is invalid!
|
||||
... (SimpleTerminal.Eof,),
|
||||
... ])
|
||||
False
|
||||
"""
|
||||
is_variable: Callable[[A | B], TypeGuard[A]] = flip(cur2(isinstance))(start_symbol.__class__) #type: ignore
|
||||
@cur2
|
||||
def inner(stack: Sequence[A | B], lexemes: Sequence[C]) -> bool:
|
||||
match stack:
|
||||
case [top_of_stack, *popped_stack] if is_variable(top_of_stack):
|
||||
expansions = oracle(top_of_stack, identify_lexeme(lexemes[0]))
|
||||
match expansions:
|
||||
case []:
|
||||
return False
|
||||
case [expansion]:
|
||||
return inner((*expansion, *popped_stack))(lexemes)
|
||||
case _:
|
||||
raise Exception('Not an LL(1) grammar!!!')
|
||||
case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
|
||||
return inner(stack[1:])(lexemes[1:])
|
||||
case []:
|
||||
return len(lexemes) == 0
|
||||
case _:
|
||||
return False
|
||||
raise Exception('Unreachable!')
|
||||
return wraps(parser)(inner([start_symbol]))
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
from enum import auto, IntEnum
|
||||
from build_oracle import oracle
|
||||
doctest.testmod()
|
Loading…
Reference in a new issue