Add parse_, expand some doc tests

2023-03-07 20:07:59 -05:00 · 2023-03-07 20:07:59 -05:00 · c4098d8c2b
parent 08547aea2f
commit c4098d8c2b
2 changed files with 85 additions and 10 deletions
--- a/comb_parse.py
+++ b/comb_parse.py
@ -162,23 +162,89 @@ class Parser(Generic[Out, TokN]):

 	And now, for a few sample runs:

-	>>> parse_s.parse(lex_trivial('1'))
-	Ok([(1, [])])
+	>>> parse_s.parse_(lex_trivial('1'))
+	Ok(1)

-	>>> parse_s.parse(lex_trivial('1 + 2'))
-	Ok([(3, [])])
+	>>> parse_s.parse_(lex_trivial('1 + 2'))
+	Ok(3)

-	>>> parse_s.parse(lex_trivial('1 + 2 * 3 + 4'))
-	Ok([(11, [])])
+	>>> parse_s.parse_(lex_trivial('1 + 2 * 3 + 4'))
+	Ok(11)

-	>>> parse_s.parse(lex_trivial('(1 + 2) * (3 + 4)'))
-	Ok([(21, [])])
+	>>> parse_s.parse_(lex_trivial('(1 + 2) * (3 + 4)'))
+	Ok(21)

-	>>> parse_s.parse(lex_trivial('(1 + 2 * 3) * (4 + 5) + 6'))
-	Ok([(69, [])])
+	>>> parse_s.parse_(lex_trivial('(1 + 2 * 3) * (4 + 5) + 6'))
+	Ok(69)
+
+	And an example of a bad parse:
+
+	# TODO fix this
+	>>> parse_s.parse_(lex_trivial('1 * * 2')) #doctest: +ELLIPSIS
+	Err(...)
 	"""

 	parse: Callable[[LexStream[TokN]], ParserResult[Out, TokN]]
+	"""
+	Run this parser
+
+	Accepts a stream of tokens to parse, and returns either a list of possible successful
+	parses, each with a value and a remainder of the token stream, or a list of all
+	failures which lead to the failure of this parser, each with a lexeme they failed at
+	and the token the expected.
+
+	This is meant for use in constructing parsers, and it's probably more useful to call
+	`parse_()`.
+	"""
+
+	def parse_(self, input: LexStream[TokN]) -> Result[Out, Mapping[Lexeme[TokN], Collection[TokN]]]:
+		"""
+		Run this parser, expecting at most one result which consumes the full input stream
+
+		This is a wrapper around `parse()` with a few features for ease of use, including:
+			- expects that at most one single successful parse will be produced
+			- asserts that the parse consumed the whole input stream
+			- aggregates failures into a multi-set of `Lexeme` -> expected tokens
+
+		Normal failures are passed through as an `Err`, but if the results returned by a
+		successful parse violate the above conditions, an assertion error will be raised.
+		However, by carefully constructing your grammar, you can ensure that this will not
+		happen for any input.
+
+		Because of these assertions, the successful return value of this function is
+		*just* the output type of this parse, which is much easier to use.
+
+		### Example
+
+		A parser which parses a single number.  Notice that we also require that it parses
+		an EOF.  Without this, it would be unable to consume the entire input stream, and
+		thus would fail.
+
+		>>> parse_num = Parser.token(TrivialTok.NUM).seq_ignore_tok(TrivialTok.EOF)
+		>>> parse_num.parse_(lex_trivial('1312'))
+		Ok([NUM: '1312']@(1, 1-4))
+		"""
+		match self.parse(input):
+			case Err(errors):
+				# The input is bad
+				failure_locations = FSet(lex for (lex, expected) in errors)
+				return Err({
+					location: FSet(expected for (lex, expected) in errors if lex == location)
+					for location in failure_locations
+				})
+			case Ok([result1, result2, *rest] as possible_parses):
+				# The grammar is bad
+				raise AssertionError("Parse returned multiple possible parses", possible_parses)
+			case Ok([(value, [non_empty, *rest] as remainder)]):
+				# The grammar is bad
+				raise AssertionError("Parse failed to consume the whole input, and left remainder", remainder)
+			case Ok([]):
+				# The parser code is bad
+				raise AssertionError('"Successful" parse returned no possible parses')
+			case Ok([(value, [])]):
+				return Ok(value)
+		# The code in this function is bad
+		raise AssertionError('Unreachable')

 	@staticmethod
 	def epsilon(ret: Out) -> 'Parser[Out, TokN]':
@ -361,6 +427,11 @@ class Parser(Generic[Out, TokN]):
 		>>> parse_maybe_num.parse(lex_trivial('3')) #doctest: +NORMALIZE_WHITESPACE
 		Ok([(3,  [[EOF: '']@(1, 2-2)]),
 		    (-1, [[NUM: '3']@(1, 1-1), [EOF: '']@(1, 2-2)])])
+
+		Of course, this can also produce multiple failures as well:
+
+		>>> parse_or.parse(lex_trivial('+'))
+		Err([([ADD: '+']@(1, 1-1), NUM), ([ADD: '+']@(1, 1-1), LTR)])
 		"""
 		all_parsers = (self, *parsers)
 		def inner(input: LexStream) -> ParserResult[Out, TokN]:
--- a/lex.py
+++ b/lex.py
@ -16,6 +16,10 @@ class Lexeme(Generic[B]):
 	col_end: int
 	def __repr__(self):
 		return f'[{repr(self.token)}: {repr(self.matched_string)}]@({self.line}, {self.col_start}-{self.col_end})'
+	def get_token(self) -> B:
+		return self.token
+	def get_match(self) -> str:
+		return self.matched_string

 def try_lex1(regex: Pattern[str], tok: A, input: str, line_no: int, col_no: int) -> Option[Tuple[Lexeme[A], str]]:
 	"""