Add parse_, expand some doc tests

This commit is contained in:
Emi Simpson 2023-03-07 20:07:59 -05:00
parent 08547aea2f
commit c4098d8c2b
Signed by: Emi
GPG key ID: A12F2C2FFDC3D847
2 changed files with 85 additions and 10 deletions

View file

@ -162,23 +162,89 @@ class Parser(Generic[Out, TokN]):
And now, for a few sample runs:
>>> parse_s.parse(lex_trivial('1'))
Ok([(1, [])])
>>> parse_s.parse_(lex_trivial('1'))
Ok(1)
>>> parse_s.parse(lex_trivial('1 + 2'))
Ok([(3, [])])
>>> parse_s.parse_(lex_trivial('1 + 2'))
Ok(3)
>>> parse_s.parse(lex_trivial('1 + 2 * 3 + 4'))
Ok([(11, [])])
>>> parse_s.parse_(lex_trivial('1 + 2 * 3 + 4'))
Ok(11)
>>> parse_s.parse(lex_trivial('(1 + 2) * (3 + 4)'))
Ok([(21, [])])
>>> parse_s.parse_(lex_trivial('(1 + 2) * (3 + 4)'))
Ok(21)
>>> parse_s.parse(lex_trivial('(1 + 2 * 3) * (4 + 5) + 6'))
Ok([(69, [])])
>>> parse_s.parse_(lex_trivial('(1 + 2 * 3) * (4 + 5) + 6'))
Ok(69)
And an example of a bad parse:
# TODO fix this
>>> parse_s.parse_(lex_trivial('1 * * 2')) #doctest: +ELLIPSIS
Err(...)
"""
parse: Callable[[LexStream[TokN]], ParserResult[Out, TokN]]
"""
Run this parser
Accepts a stream of tokens to parse, and returns either a list of possible successful
parses, each with a value and a remainder of the token stream, or a list of all
failures which lead to the failure of this parser, each with a lexeme they failed at
and the token the expected.
This is meant for use in constructing parsers, and it's probably more useful to call
`parse_()`.
"""
def parse_(self, input: LexStream[TokN]) -> Result[Out, Mapping[Lexeme[TokN], Collection[TokN]]]:
"""
Run this parser, expecting at most one result which consumes the full input stream
This is a wrapper around `parse()` with a few features for ease of use, including:
- expects that at most one single successful parse will be produced
- asserts that the parse consumed the whole input stream
- aggregates failures into a multi-set of `Lexeme` -> expected tokens
Normal failures are passed through as an `Err`, but if the results returned by a
successful parse violate the above conditions, an assertion error will be raised.
However, by carefully constructing your grammar, you can ensure that this will not
happen for any input.
Because of these assertions, the successful return value of this function is
*just* the output type of this parse, which is much easier to use.
### Example
A parser which parses a single number. Notice that we also require that it parses
an EOF. Without this, it would be unable to consume the entire input stream, and
thus would fail.
>>> parse_num = Parser.token(TrivialTok.NUM).seq_ignore_tok(TrivialTok.EOF)
>>> parse_num.parse_(lex_trivial('1312'))
Ok([NUM: '1312']@(1, 1-4))
"""
match self.parse(input):
case Err(errors):
# The input is bad
failure_locations = FSet(lex for (lex, expected) in errors)
return Err({
location: FSet(expected for (lex, expected) in errors if lex == location)
for location in failure_locations
})
case Ok([result1, result2, *rest] as possible_parses):
# The grammar is bad
raise AssertionError("Parse returned multiple possible parses", possible_parses)
case Ok([(value, [non_empty, *rest] as remainder)]):
# The grammar is bad
raise AssertionError("Parse failed to consume the whole input, and left remainder", remainder)
case Ok([]):
# The parser code is bad
raise AssertionError('"Successful" parse returned no possible parses')
case Ok([(value, [])]):
return Ok(value)
# The code in this function is bad
raise AssertionError('Unreachable')
@staticmethod
def epsilon(ret: Out) -> 'Parser[Out, TokN]':
@ -361,6 +427,11 @@ class Parser(Generic[Out, TokN]):
>>> parse_maybe_num.parse(lex_trivial('3')) #doctest: +NORMALIZE_WHITESPACE
Ok([(3, [[EOF: '']@(1, 2-2)]),
(-1, [[NUM: '3']@(1, 1-1), [EOF: '']@(1, 2-2)])])
Of course, this can also produce multiple failures as well:
>>> parse_or.parse(lex_trivial('+'))
Err([([ADD: '+']@(1, 1-1), NUM), ([ADD: '+']@(1, 1-1), LTR)])
"""
all_parsers = (self, *parsers)
def inner(input: LexStream) -> ParserResult[Out, TokN]:

4
lex.py
View file

@ -16,6 +16,10 @@ class Lexeme(Generic[B]):
col_end: int
def __repr__(self):
return f'[{repr(self.token)}: {repr(self.matched_string)}]@({self.line}, {self.col_start}-{self.col_end})'
def get_token(self) -> B:
return self.token
def get_match(self) -> str:
return self.matched_string
def try_lex1(regex: Pattern[str], tok: A, input: str, line_no: int, col_no: int) -> Option[Tuple[Lexeme[A], str]]:
"""