JSON-Lang/tokens.py

from enum import auto, IntEnum
from typing import Collection, Tuple
from re import compile, Pattern

class Tok(IntEnum):
	"""
	All possible tokens used in the grammar
	"""
	Newline = auto()
	Whitespace = auto()
	PredicateSection = auto()
	VariablesSection = auto()
	ConstantsSection = auto()
	FunctionsSection = auto()
	ClausesSection = auto()
	Negate = auto()
	OpenP = auto()
	CloseP = auto()
	Comma = auto()
	Identifier = auto()
	Eof = auto()

	def __repr__(self):
		return self._name_

LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
	(compile(r"\n"), Tok.Newline),
	(compile(r"[ \t]+"), Tok.Whitespace),
	(compile("Predicates:"), Tok.PredicateSection),
	(compile("Variables:"), Tok.VariablesSection),
	(compile("Constants:"), Tok.ConstantsSection),
	(compile("Functions:"), Tok.FunctionsSection),
	(compile("Clauses:"), Tok.ClausesSection),
	(compile("!"), Tok.Negate),
	(compile(r"\("), Tok.OpenP),
	(compile(r"\)"), Tok.CloseP),
	(compile(","), Tok.Comma),
	(compile(r"\w+"), Tok.Identifier),
]
"""
A mapping of regexs to the tokens the identify

Tokens earlier on in the list should be regarded as higher priority, even if a match lower
on the list also matches.  All unicode strings should be matched by at least one token.
"""
Factor out IR and Tok 2023-03-05 21:44:30 +00:00			`from enum import auto, IntEnum`
			`from typing import Collection, Tuple`
			`from re import compile, Pattern`

			`class Tok(IntEnum):`
			`"""`
			`All possible tokens used in the grammar`
			`"""`
			`Newline = auto()`
			`Whitespace = auto()`
			`PredicateSection = auto()`
			`VariablesSection = auto()`
			`ConstantsSection = auto()`
			`FunctionsSection = auto()`
			`ClausesSection = auto()`
			`Negate = auto()`
			`OpenP = auto()`
			`CloseP = auto()`
			`Comma = auto()`
			`Identifier = auto()`
			`Eof = auto()`

			`def __repr__(self):`
			`return self._name_`

			`LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [`
			`(compile(r"\n"), Tok.Newline),`
			`(compile(r"[ \t]+"), Tok.Whitespace),`
			`(compile("Predicates:"), Tok.PredicateSection),`
			`(compile("Variables:"), Tok.VariablesSection),`
			`(compile("Constants:"), Tok.ConstantsSection),`
			`(compile("Functions:"), Tok.FunctionsSection),`
			`(compile("Clauses:"), Tok.ClausesSection),`
			`(compile("!"), Tok.Negate),`
			`(compile(r"\("), Tok.OpenP),`
			`(compile(r"\)"), Tok.CloseP),`
			`(compile(","), Tok.Comma),`
			`(compile(r"\w+"), Tok.Identifier),`
			`]`
			`"""`
			`A mapping of regexs to the tokens the identify`

			`Tokens earlier on in the list should be regarded as higher priority, even if a match lower`
			`on the list also matches. All unicode strings should be matched by at least one token.`
			`"""`