Add parse_, expand some doc tests
This commit is contained in:
parent
08547aea2f
commit
c4098d8c2b
|
@ -162,23 +162,89 @@ class Parser(Generic[Out, TokN]):
|
|||
|
||||
And now, for a few sample runs:
|
||||
|
||||
>>> parse_s.parse(lex_trivial('1'))
|
||||
Ok([(1, [])])
|
||||
>>> parse_s.parse_(lex_trivial('1'))
|
||||
Ok(1)
|
||||
|
||||
>>> parse_s.parse(lex_trivial('1 + 2'))
|
||||
Ok([(3, [])])
|
||||
>>> parse_s.parse_(lex_trivial('1 + 2'))
|
||||
Ok(3)
|
||||
|
||||
>>> parse_s.parse(lex_trivial('1 + 2 * 3 + 4'))
|
||||
Ok([(11, [])])
|
||||
>>> parse_s.parse_(lex_trivial('1 + 2 * 3 + 4'))
|
||||
Ok(11)
|
||||
|
||||
>>> parse_s.parse(lex_trivial('(1 + 2) * (3 + 4)'))
|
||||
Ok([(21, [])])
|
||||
>>> parse_s.parse_(lex_trivial('(1 + 2) * (3 + 4)'))
|
||||
Ok(21)
|
||||
|
||||
>>> parse_s.parse(lex_trivial('(1 + 2 * 3) * (4 + 5) + 6'))
|
||||
Ok([(69, [])])
|
||||
>>> parse_s.parse_(lex_trivial('(1 + 2 * 3) * (4 + 5) + 6'))
|
||||
Ok(69)
|
||||
|
||||
And an example of a bad parse:
|
||||
|
||||
# TODO fix this
|
||||
>>> parse_s.parse_(lex_trivial('1 * * 2')) #doctest: +ELLIPSIS
|
||||
Err(...)
|
||||
"""
|
||||
|
||||
parse: Callable[[LexStream[TokN]], ParserResult[Out, TokN]]
|
||||
"""
|
||||
Run this parser
|
||||
|
||||
Accepts a stream of tokens to parse, and returns either a list of possible successful
|
||||
parses, each with a value and a remainder of the token stream, or a list of all
|
||||
failures which lead to the failure of this parser, each with a lexeme they failed at
|
||||
and the token the expected.
|
||||
|
||||
This is meant for use in constructing parsers, and it's probably more useful to call
|
||||
`parse_()`.
|
||||
"""
|
||||
|
||||
def parse_(self, input: LexStream[TokN]) -> Result[Out, Mapping[Lexeme[TokN], Collection[TokN]]]:
|
||||
"""
|
||||
Run this parser, expecting at most one result which consumes the full input stream
|
||||
|
||||
This is a wrapper around `parse()` with a few features for ease of use, including:
|
||||
- expects that at most one single successful parse will be produced
|
||||
- asserts that the parse consumed the whole input stream
|
||||
- aggregates failures into a multi-set of `Lexeme` -> expected tokens
|
||||
|
||||
Normal failures are passed through as an `Err`, but if the results returned by a
|
||||
successful parse violate the above conditions, an assertion error will be raised.
|
||||
However, by carefully constructing your grammar, you can ensure that this will not
|
||||
happen for any input.
|
||||
|
||||
Because of these assertions, the successful return value of this function is
|
||||
*just* the output type of this parse, which is much easier to use.
|
||||
|
||||
### Example
|
||||
|
||||
A parser which parses a single number. Notice that we also require that it parses
|
||||
an EOF. Without this, it would be unable to consume the entire input stream, and
|
||||
thus would fail.
|
||||
|
||||
>>> parse_num = Parser.token(TrivialTok.NUM).seq_ignore_tok(TrivialTok.EOF)
|
||||
>>> parse_num.parse_(lex_trivial('1312'))
|
||||
Ok([NUM: '1312']@(1, 1-4))
|
||||
"""
|
||||
match self.parse(input):
|
||||
case Err(errors):
|
||||
# The input is bad
|
||||
failure_locations = FSet(lex for (lex, expected) in errors)
|
||||
return Err({
|
||||
location: FSet(expected for (lex, expected) in errors if lex == location)
|
||||
for location in failure_locations
|
||||
})
|
||||
case Ok([result1, result2, *rest] as possible_parses):
|
||||
# The grammar is bad
|
||||
raise AssertionError("Parse returned multiple possible parses", possible_parses)
|
||||
case Ok([(value, [non_empty, *rest] as remainder)]):
|
||||
# The grammar is bad
|
||||
raise AssertionError("Parse failed to consume the whole input, and left remainder", remainder)
|
||||
case Ok([]):
|
||||
# The parser code is bad
|
||||
raise AssertionError('"Successful" parse returned no possible parses')
|
||||
case Ok([(value, [])]):
|
||||
return Ok(value)
|
||||
# The code in this function is bad
|
||||
raise AssertionError('Unreachable')
|
||||
|
||||
@staticmethod
|
||||
def epsilon(ret: Out) -> 'Parser[Out, TokN]':
|
||||
|
@ -361,6 +427,11 @@ class Parser(Generic[Out, TokN]):
|
|||
>>> parse_maybe_num.parse(lex_trivial('3')) #doctest: +NORMALIZE_WHITESPACE
|
||||
Ok([(3, [[EOF: '']@(1, 2-2)]),
|
||||
(-1, [[NUM: '3']@(1, 1-1), [EOF: '']@(1, 2-2)])])
|
||||
|
||||
Of course, this can also produce multiple failures as well:
|
||||
|
||||
>>> parse_or.parse(lex_trivial('+'))
|
||||
Err([([ADD: '+']@(1, 1-1), NUM), ([ADD: '+']@(1, 1-1), LTR)])
|
||||
"""
|
||||
all_parsers = (self, *parsers)
|
||||
def inner(input: LexStream) -> ParserResult[Out, TokN]:
|
||||
|
|
4
lex.py
4
lex.py
|
@ -16,6 +16,10 @@ class Lexeme(Generic[B]):
|
|||
col_end: int
|
||||
def __repr__(self):
|
||||
return f'[{repr(self.token)}: {repr(self.matched_string)}]@({self.line}, {self.col_start}-{self.col_end})'
|
||||
def get_token(self) -> B:
|
||||
return self.token
|
||||
def get_match(self) -> str:
|
||||
return self.matched_string
|
||||
|
||||
def try_lex1(regex: Pattern[str], tok: A, input: str, line_no: int, col_no: int) -> Option[Tuple[Lexeme[A], str]]:
|
||||
"""
|
||||
|
|
Loading…
Reference in a new issue