from emis_funky_funktions import * from functools import wraps from operator import contains from typing import Callable, Collection, Mapping, TypeGuard def parser( oracle: Callable[[A, B], Collection[Sequence[A | B]]], identify_lexeme: Callable[[C], B], start_symbol: A, ) -> Callable[[Sequence[C]], bool]: """ Produces a parser based on a grammar, an oracle, and a start symbol. The `identify_lexeme` argument should be a function which converts a lexeme into the token that it represents. This allows for the actual lexemes that are being fed in to be more complex, and store additional data. ### Example: We generate a simple grammar: >>> class SimpleVariable(IntEnum): ... S = auto() ... Sum = auto() ... Sum_ = auto() ... Term = auto() >>> class SimpleTerminal(IntEnum): ... Number = auto() ... Letter = auto() ... Plus = auto() ... Eof = auto() >>> grammar = [ ... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof]), ... (SimpleVariable.Sum, [SimpleVariable.Term, SimpleVariable.Sum_]), ... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum]), ... (SimpleVariable.Sum_, []), ... (SimpleVariable.Term, [SimpleTerminal.Number]), ... (SimpleVariable.Term, [SimpleTerminal.Letter]), ... ] >>> my_oracle_table = oracle(flip(cur2(isinstance))(SimpleTerminal), grammar) >>> my_parser = parser(my_oracle_table, lambda x: x[0], SimpleVariable.S) >>> my_parser([ ... (SimpleTerminal.Number, 1), ... (SimpleTerminal.Plus,), ... (SimpleTerminal.Letter, 'x'), ... (SimpleTerminal.Plus,), ... (SimpleTerminal.Number, 10), ... (SimpleTerminal.Eof,), ... ]) True >>> my_parser([ ... (SimpleTerminal.Number, 1), ... (SimpleTerminal.Plus,), ... (SimpleTerminal.Letter, 'x'), ... (SimpleTerminal.Number, 10), # <--- this is invalid! ... (SimpleTerminal.Eof,), ... ]) False """ is_variable: Callable[[A | B], TypeGuard[A]] = flip(cur2(isinstance))(start_symbol.__class__) #type: ignore @cur2 def inner(stack: Sequence[A | B], lexemes: Sequence[C]) -> bool: match stack: case [top_of_stack, *popped_stack] if is_variable(top_of_stack): expansions = oracle(top_of_stack, identify_lexeme(lexemes[0])) match expansions: case []: return False case [expansion]: return inner((*expansion, *popped_stack))(lexemes) case _: raise Exception('Not an LL(1) grammar!!!') case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]): return inner(stack[1:])(lexemes[1:]) case []: return len(lexemes) == 0 case _: return False raise Exception('Unreachable!') return wraps(parser)(inner([start_symbol])) if __name__ == '__main__': import doctest from enum import auto, IntEnum from build_oracle import oracle doctest.testmod()