Reverse parser direction + doc
This commit is contained in:
parent
3afed0c2e0
commit
f692e8dcca
32
parse.py
32
parse.py
|
@ -7,6 +7,11 @@ from typing import Callable, Collection, Mapping, TypeGuard
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class Action(Generic[A]):
|
class Action(Generic[A]):
|
||||||
|
"""
|
||||||
|
Denotes an action annotation.
|
||||||
|
|
||||||
|
See `parser()`
|
||||||
|
"""
|
||||||
f: Callable[[Sequence[A]], Sequence[A]]
|
f: Callable[[Sequence[A]], Sequence[A]]
|
||||||
def __call__(self, i: Sequence[A]) -> Sequence[A]:
|
def __call__(self, i: Sequence[A]) -> Sequence[A]:
|
||||||
return self.f(i)
|
return self.f(i)
|
||||||
|
@ -33,6 +38,23 @@ def parser(
|
||||||
token that it represents. This allows for the actual lexemes that are being fed in to
|
token that it represents. This allows for the actual lexemes that are being fed in to
|
||||||
be more complex, and store additional data.
|
be more complex, and store additional data.
|
||||||
|
|
||||||
|
The oracle table my include "action" annotations in its sequences. Actions should be
|
||||||
|
an instance of `Action`, and should work on the AST stack. Every matched lexeme is
|
||||||
|
pushed to the AST stack. An action may transform this stack by popping some number of
|
||||||
|
items off of it, constructing some AST, pushing that AST back to the stack, and then
|
||||||
|
returning the modified stack.
|
||||||
|
|
||||||
|
A couple things to note about this process:
|
||||||
|
- The stack that is passed to each action is immutable. "Modifications" should be
|
||||||
|
made by simply constructing and returning a new stack.
|
||||||
|
- The bottom of the stack is the zero index.
|
||||||
|
|
||||||
|
If a parse is successful, the return value will be the AST stack at the end of the
|
||||||
|
parse. It is up the the caller to verify that this is an expected result.
|
||||||
|
|
||||||
|
If a parse fails, the return value will be a tuple containing the erronious lexeme and
|
||||||
|
a collection of expected tokens which failed to match it.
|
||||||
|
|
||||||
### Example:
|
### Example:
|
||||||
|
|
||||||
We generate a simple grammar:
|
We generate a simple grammar:
|
||||||
|
@ -47,10 +69,10 @@ def parser(
|
||||||
... Eof = auto()
|
... Eof = auto()
|
||||||
... def __repr__(self):
|
... def __repr__(self):
|
||||||
... return self.name
|
... return self.name
|
||||||
>>> build_S = Action(lambda x: x[:-1])
|
>>> build_S = Action(lambda x: x[1:])
|
||||||
>>> build_Sum = Action(lambda x: (*x[:-2], x[-1](int(x[-2][1]))))
|
>>> build_Sum = Action(lambda x: (x[0](x[1][1]), *x[2:]))
|
||||||
>>> build_Sum_1 = Action(lambda x: (*x[:-2], lambda y: x[-1] + y))
|
>>> build_Sum_1 = Action(lambda x: (lambda y: x[0] + y, *x[2:]))
|
||||||
>>> build_Sum_2 = Action(lambda x: (*x, lambda y: y))
|
>>> build_Sum_2 = Action(lambda x: (lambda y: y, *x))
|
||||||
>>> grammar = [
|
>>> grammar = [
|
||||||
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]),
|
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]),
|
||||||
... (SimpleVariable.Sum, [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]),
|
... (SimpleVariable.Sum, [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]),
|
||||||
|
@ -104,7 +126,7 @@ def parser(
|
||||||
raise Exception('Not an LL(1) grammar!!!')
|
raise Exception('Not an LL(1) grammar!!!')
|
||||||
# B [Token] (match)
|
# B [Token] (match)
|
||||||
case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
|
case [top_of_stack, *popped_stack] if top_of_stack == identify_lexeme(lexemes[0]):
|
||||||
return inner(popped_stack, (*ast_stack, lexemes[0]), lexemes[1:])
|
return inner(popped_stack, (lexemes[0], *ast_stack), lexemes[1:])
|
||||||
# B [Token] (no match)
|
# B [Token] (no match)
|
||||||
case [top_of_stack, *popped_stack]:
|
case [top_of_stack, *popped_stack]:
|
||||||
assert is_tok(top_of_stack)
|
assert is_tok(top_of_stack)
|
||||||
|
|
Loading…
Reference in a new issue