Reverse parser direction + doc

2023-03-04 19:31:30 -05:00 · 2023-03-04 19:31:30 -05:00 · f692e8dcca
parent 3afed0c2e0
commit f692e8dcca
1 changed files with 27 additions and 5 deletions
--- a/parse.py
+++ b/parse.py
@ -7,6 +7,11 @@ from typing import Callable, Collection, Mapping, TypeGuard

@dataclass(frozen=True)
 class Action(Generic[A]):
+    """
+    Denotes an action annotation.
+
+    See `parser()`
+    """
    f: Callable[[Sequence[A]], Sequence[A]]
    def __call__(self, i: Sequence[A]) -> Sequence[A]:
        return self.f(i)
@ -33,6 +38,23 @@ def parser(
    token that it represents.  This allows for the actual lexemes that are being fed in to
    be more complex, and store additional data.

+    The oracle table my include "action" annotations in its sequences.  Actions should be
+    an instance of `Action`, and should work on the AST stack.  Every matched lexeme is
+    pushed to the AST stack.  An action may transform this stack by popping some number of
+    items off of it, constructing some AST, pushing that AST back to the stack, and then
+    returning the modified stack.
+
+    A couple things to note about this process:
+        - The stack that is passed to each action is immutable.  "Modifications" should be
+          made by simply constructing and returning a new stack.
+        - The bottom of the stack is the zero index.
+
+    If a parse is successful, the return value will be the AST stack at the end of the
+    parse.  It is up the the caller to verify that this is an expected result.
+
+    If a parse fails, the return value will be a tuple containing the erronious lexeme and
+    a collection of expected tokens which failed to match it.
+
    ### Example:

    We generate a simple grammar:
@ -47,10 +69,10 @@ def parser(
    ...     Eof = auto()
    ...     def __repr__(self):
    ...         return self.name
-    >>> build_S = Action(lambda x: x[:-1])
-    >>> build_Sum = Action(lambda x: (*x[:-2], x[-1](int(x[-2][1]))))
-    >>> build_Sum_1 = Action(lambda x: (*x[:-2], lambda y: x[-1] + y))
-    >>> build_Sum_2 = Action(lambda x: (*x, lambda y: y))
+    >>> build_S = Action(lambda x: x[1:])
+    >>> build_Sum = Action(lambda x: (x[0](x[1][1]), *x[2:]))
+    >>> build_Sum_1 = Action(lambda x: (lambda y: x[0] + y, *x[2:]))
+    >>> build_Sum_2 = Action(lambda x: (lambda y: y, *x))
    >>> grammar = [
    ...     (SimpleVariable.S,    [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]),
    ...     (SimpleVariable.Sum,  [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]),
@ -104,7 +126,7 @@ def parser(
                        raise Exception('Not an LL(1) grammar!!!')
            # B [Token] (match)
            case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
-                return inner(popped_stack, (*ast_stack, lexemes[0]), lexemes[1:])
+                return inner(popped_stack, (lexemes[0], *ast_stack), lexemes[1:])
            # B [Token] (no match)
            case [top_of_stack, *popped_stack]:
                assert is_tok(top_of_stack)