from enum import auto, IntEnum from typing import Collection, Tuple from re import compile, Pattern class Tok(IntEnum): """ All possible tokens used in the grammar """ Whitespace = auto() OpenCurly = auto() CloseCurly = auto() OpenSquare = auto() CloseSquare = auto() Comma = auto() Colon = auto() String = auto() Number = auto() Eof = auto() def __repr__(self): return self._name_ LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [ (compile(r"[\s\n]+"), Tok.Whitespace), (compile(r"{"), Tok.OpenCurly), (compile(r"}"), Tok.CloseCurly), (compile(r"\["), Tok.OpenSquare), (compile(r"\]"), Tok.CloseSquare), (compile(r","), Tok.Comma), (compile(r":"), Tok.Colon), (compile(r'"[^"]*"'), Tok.String), (compile(r'\d+'), Tok.Number), ] """ A mapping of regexs to the tokens the identify Tokens earlier on in the list should be regarded as higher priority, even if a match lower on the list also matches. All unicode strings should be matched by at least one token. """