diff --git a/parse.py b/parse.py index 2e800ed..02a53ed 100644 --- a/parse.py +++ b/parse.py @@ -1,14 +1,31 @@ from emis_funky_funktions import * +from dataclasses import dataclass from functools import wraps from operator import contains from typing import Callable, Collection, Mapping, TypeGuard +@dataclass(frozen=True) +class Action(Generic[A]): + f: Callable[[Sequence[A]], Sequence[A]] + def __call__(self, i: Sequence[A]) -> Sequence[A]: + return self.f(i) + +def _expected(row: Mapping[B, Collection[Sequence[Any]]]) -> Collection[B]: + """ + Given a single row from an oracle table, identify the expected terminals + """ + return [ + terminal + for terminal, expansions in row.items() + if len(expansions) + ] + def parser( - oracle: Callable[[A, B], Collection[Sequence[A | B]]], - identify_lexeme: Callable[[C], B], + oracle: Mapping[A, Mapping[B, Collection[Sequence[A | B | Action[C | D]]]]], + identify_lexeme: Callable[[D], B], start_symbol: A, -) -> Callable[[Sequence[C]], bool]: +) -> Callable[[Sequence[D]], Result[Sequence[C | D], Tuple[D, Collection[B]]]]: """ Produces a parser based on a grammar, an oracle, and a start symbol. @@ -24,66 +41,85 @@ def parser( ... S = auto() ... Sum = auto() ... Sum_ = auto() - ... Term = auto() >>> class SimpleTerminal(IntEnum): ... Number = auto() - ... Letter = auto() ... Plus = auto() ... Eof = auto() + ... def __repr__(self): + ... return self.name + >>> build_S = Action(lambda x: x[:-1]) + >>> build_Sum = Action(lambda x: (*x[:-2], x[-1](int(x[-2][1])))) + >>> build_Sum_1 = Action(lambda x: (*x[:-2], lambda y: x[-1] + y)) + >>> build_Sum_2 = Action(lambda x: (*x, lambda y: y)) >>> grammar = [ - ... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof]), - ... (SimpleVariable.Sum, [SimpleVariable.Term, SimpleVariable.Sum_]), - ... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum]), - ... (SimpleVariable.Sum_, []), - ... (SimpleVariable.Term, [SimpleTerminal.Number]), - ... (SimpleVariable.Term, [SimpleTerminal.Letter]), + ... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]), + ... (SimpleVariable.Sum, [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]), + ... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum, build_Sum_1]), + ... (SimpleVariable.Sum_, [build_Sum_2]), ... ] - >>> my_oracle_table = oracle(flip(cur2(isinstance))(SimpleTerminal), grammar) + >>> is_term = p_instance(SimpleTerminal) + >>> is_var = p_instance(SimpleVariable) + >>> my_oracle_table = oracle_table(is_term, is_var, grammar) >>> my_parser = parser(my_oracle_table, lambda x: x[0], SimpleVariable.S) >>> my_parser([ ... (SimpleTerminal.Number, 1), ... (SimpleTerminal.Plus,), - ... (SimpleTerminal.Letter, 'x'), + ... (SimpleTerminal.Number, 3), ... (SimpleTerminal.Plus,), ... (SimpleTerminal.Number, 10), ... (SimpleTerminal.Eof,), ... ]) - True + Ok((14,)) >>> my_parser([ ... (SimpleTerminal.Number, 1), ... (SimpleTerminal.Plus,), - ... (SimpleTerminal.Letter, 'x'), + ... (SimpleTerminal.Number, 3), ... (SimpleTerminal.Number, 10), # <--- this is invalid! ... (SimpleTerminal.Eof,), ... ]) - False + Err(((Number, 10), [Plus, Eof])) """ - is_variable: Callable[[A | B], TypeGuard[A]] = flip(cur2(isinstance))(start_symbol.__class__) #type: ignore - @cur2 - def inner(stack: Sequence[A | B], lexemes: Sequence[C]) -> bool: + is_var: Callable[[Any], TypeGuard[A]] = p_instance(start_symbol.__class__) + is_tok: Callable[[Any], TypeGuard[B]] = p_instance(next(iter(oracle[start_symbol].keys())).__class__) + def inner( + stack: Sequence[A | B | Action[C | D]], + ast_stack: Sequence[C | D], + lexemes: Sequence[D], + ) -> Result[Sequence[C | D], Tuple[D, Collection[B]]]: match stack: - case [top_of_stack, *popped_stack] if is_variable(top_of_stack): - expansions = oracle(top_of_stack, identify_lexeme(lexemes[0])) + # Action + case [Action(f), *popped_stack]: + return inner(popped_stack, f(ast_stack), lexemes) + # A [Variable] + case [top_of_stack, *popped_stack] if is_var(top_of_stack): + expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])] match expansions: case []: - return False + return Err((lexemes[0], _expected(oracle[top_of_stack]))) case [expansion]: - return inner((*expansion, *popped_stack))(lexemes) + return inner((*expansion, *popped_stack), ast_stack, lexemes) case _: raise Exception('Not an LL(1) grammar!!!') + # B [Token] (match) case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]): - return inner(stack[1:])(lexemes[1:]) + return inner(popped_stack, (*ast_stack, lexemes[0]), lexemes[1:]) + # B [Token] (no match) + case [top_of_stack, *popped_stack]: + assert is_tok(top_of_stack) + return Err((lexemes[0], (top_of_stack,))) + # Empty stack (finished parsing) case []: - return len(lexemes) == 0 - case _: - return False + if len(lexemes): + return Err((lexemes[0], [])) + else: + return Ok(ast_stack) raise Exception('Unreachable!') - return wraps(parser)(inner([start_symbol])) + return wraps(parser)(p(inner, [start_symbol], [])) if __name__ == '__main__': import doctest from enum import auto, IntEnum - from build_oracle import oracle + from build_oracle import oracle_table doctest.testmod() \ No newline at end of file