Reverse parser direction + doc
This commit is contained in:
parent
3afed0c2e0
commit
f692e8dcca
32
parse.py
32
parse.py
|
@ -7,6 +7,11 @@ from typing import Callable, Collection, Mapping, TypeGuard
|
|||
|
||||
@dataclass(frozen=True)
|
||||
class Action(Generic[A]):
|
||||
"""
|
||||
Denotes an action annotation.
|
||||
|
||||
See `parser()`
|
||||
"""
|
||||
f: Callable[[Sequence[A]], Sequence[A]]
|
||||
def __call__(self, i: Sequence[A]) -> Sequence[A]:
|
||||
return self.f(i)
|
||||
|
@ -33,6 +38,23 @@ def parser(
|
|||
token that it represents. This allows for the actual lexemes that are being fed in to
|
||||
be more complex, and store additional data.
|
||||
|
||||
The oracle table my include "action" annotations in its sequences. Actions should be
|
||||
an instance of `Action`, and should work on the AST stack. Every matched lexeme is
|
||||
pushed to the AST stack. An action may transform this stack by popping some number of
|
||||
items off of it, constructing some AST, pushing that AST back to the stack, and then
|
||||
returning the modified stack.
|
||||
|
||||
A couple things to note about this process:
|
||||
- The stack that is passed to each action is immutable. "Modifications" should be
|
||||
made by simply constructing and returning a new stack.
|
||||
- The bottom of the stack is the zero index.
|
||||
|
||||
If a parse is successful, the return value will be the AST stack at the end of the
|
||||
parse. It is up the the caller to verify that this is an expected result.
|
||||
|
||||
If a parse fails, the return value will be a tuple containing the erronious lexeme and
|
||||
a collection of expected tokens which failed to match it.
|
||||
|
||||
### Example:
|
||||
|
||||
We generate a simple grammar:
|
||||
|
@ -47,10 +69,10 @@ def parser(
|
|||
... Eof = auto()
|
||||
... def __repr__(self):
|
||||
... return self.name
|
||||
>>> build_S = Action(lambda x: x[:-1])
|
||||
>>> build_Sum = Action(lambda x: (*x[:-2], x[-1](int(x[-2][1]))))
|
||||
>>> build_Sum_1 = Action(lambda x: (*x[:-2], lambda y: x[-1] + y))
|
||||
>>> build_Sum_2 = Action(lambda x: (*x, lambda y: y))
|
||||
>>> build_S = Action(lambda x: x[1:])
|
||||
>>> build_Sum = Action(lambda x: (x[0](x[1][1]), *x[2:]))
|
||||
>>> build_Sum_1 = Action(lambda x: (lambda y: x[0] + y, *x[2:]))
|
||||
>>> build_Sum_2 = Action(lambda x: (lambda y: y, *x))
|
||||
>>> grammar = [
|
||||
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]),
|
||||
... (SimpleVariable.Sum, [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]),
|
||||
|
@ -104,7 +126,7 @@ def parser(
|
|||
raise Exception('Not an LL(1) grammar!!!')
|
||||
# B [Token] (match)
|
||||
case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
|
||||
return inner(popped_stack, (*ast_stack, lexemes[0]), lexemes[1:])
|
||||
return inner(popped_stack, (lexemes[0], *ast_stack), lexemes[1:])
|
||||
# B [Token] (no match)
|
||||
case [top_of_stack, *popped_stack]:
|
||||
assert is_tok(top_of_stack)
|
||||
|
|
Loading…
Reference in a new issue