JSON-Lang/grammar.py

from enum import auto, IntEnum
from typing import Collection, Tuple
from re import compile, Pattern

class Tok(IntEnum):
	"""
	All possible tokens used in the grammar
	"""
	Whitespace = auto()
	OpenCurly = auto()
	CloseCurly = auto()
	OpenSquare = auto()
	CloseSquare = auto()
	Comma = auto()
	Colon = auto()
	String = auto()
	Number = auto()
	Eof = auto()

	def __repr__(self):
		return self._name_

LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
	(compile(r"[\s\n]+"), Tok.Whitespace),
	(compile(r"{"), Tok.OpenCurly),
	(compile(r"}"), Tok.CloseCurly),
	(compile(r"\["), Tok.OpenSquare),
	(compile(r"\]"), Tok.CloseSquare),
	(compile(r","), Tok.Comma),
	(compile(r":"), Tok.Colon),
	(compile(r'"[^"]*"'), Tok.String),
	(compile(r'\d+'), Tok.Number),
]
"""
A mapping of regexs to the tokens the identify

Tokens earlier on in the list should be regarded as higher priority, even if a match lower
on the list also matches.  All unicode strings should be matched by at least one token.
"""