Reverse parser direction + doc

2023-03-04 19:31:30 -05:00 · 2023-03-04 19:31:30 -05:00 · f692e8dcca
parent 3afed0c2e0
commit f692e8dcca
1 changed files with 27 additions and 5 deletions
--- a/parse.py
+++ b/parse.py
@ -7,6 +7,11 @@ from typing import Callable, Collection, Mapping, TypeGuard
@dataclass(frozen=True)
 class Action(Generic[A]):
    """
    Denotes an action annotation.
    See `parser()`
    """
    f: Callable[[Sequence[A]], Sequence[A]]
    def __call__(self, i: Sequence[A]) -> Sequence[A]:
        return self.f(i)
@ -33,6 +38,23 @@ def parser(
    token that it represents.  This allows for the actual lexemes that are being fed in to
    be more complex, and store additional data.
    The oracle table my include "action" annotations in its sequences.  Actions should be
    an instance of `Action`, and should work on the AST stack.  Every matched lexeme is
    pushed to the AST stack.  An action may transform this stack by popping some number of
    items off of it, constructing some AST, pushing that AST back to the stack, and then
    returning the modified stack.
    A couple things to note about this process:
        - The stack that is passed to each action is immutable.  "Modifications" should be
          made by simply constructing and returning a new stack.
        - The bottom of the stack is the zero index.
    If a parse is successful, the return value will be the AST stack at the end of the
    parse.  It is up the the caller to verify that this is an expected result.
    If a parse fails, the return value will be a tuple containing the erronious lexeme and
    a collection of expected tokens which failed to match it.
    ### Example:
    We generate a simple grammar:
@ -47,10 +69,10 @@ def parser(
    ...     Eof = auto()
    ...     def __repr__(self):
    ...         return self.name
-    >>> build_S = Action(lambda x: x[:-1])
+    >>> build_S = Action(lambda x: x[1:])
-    >>> build_Sum = Action(lambda x: (*x[:-2], x[-1](int(x[-2][1]))))
+    >>> build_Sum = Action(lambda x: (x[0](x[1][1]), *x[2:]))
-    >>> build_Sum_1 = Action(lambda x: (*x[:-2], lambda y: x[-1] + y))
+    >>> build_Sum_1 = Action(lambda x: (lambda y: x[0] + y, *x[2:]))
-    >>> build_Sum_2 = Action(lambda x: (*x, lambda y: y))
+    >>> build_Sum_2 = Action(lambda x: (lambda y: y, *x))
    >>> grammar = [
    ...     (SimpleVariable.S,    [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]),
    ...     (SimpleVariable.Sum,  [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]),
@ -104,7 +126,7 @@ def parser(
                        raise Exception('Not an LL(1) grammar!!!')
            # B [Token] (match)
            case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
-                return inner(popped_stack, (*ast_stack, lexemes[0]), lexemes[1:])
+                return inner(popped_stack, (lexemes[0], *ast_stack), lexemes[1:])
            # B [Token] (no match)
            case [top_of_stack, *popped_stack]:
                assert is_tok(top_of_stack)