Factor out IR and Tok

2023-03-05 16:44:30 -05:00 · 2023-03-05 16:44:30 -05:00 · f84a340f0a
parent 9e6c7d504d
commit f84a340f0a
3 changed files with 73 additions and 62 deletions
--- a/grammar.py
+++ b/grammar.py
@ -12,52 +12,13 @@ from dataclasses import dataclass
 from enum import auto, IntEnum
 from re import compile, Pattern

+from ir import IRNeg, IRProp, IRTerm, IRVar
 from lex import Lexeme
 from parse import Action
+from tokens import *

 from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias

-class Tok(IntEnum):
-	"""
-	All possible tokens used in the grammar
-	"""
-	Newline = auto()
-	Whitespace = auto()
-	PredicateSection = auto()
-	VariablesSection = auto()
-	ConstantsSection = auto()
-	FunctionsSection = auto()
-	ClausesSection = auto()
-	Negate = auto()
-	OpenP = auto()
-	CloseP = auto()
-	Comma = auto()
-	Identifier = auto()
-	Eof = auto()
-
-	def __repr__(self):
-		return self._name_
-
-LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
-	(compile(r"\n"), Tok.Newline),
-	(compile(r"[ \t]+"), Tok.Whitespace),
-	(compile("Predicates:"), Tok.PredicateSection),
-	(compile("Variables:"), Tok.VariablesSection),
-	(compile("Constants:"), Tok.ConstantsSection),
-	(compile("Functions:"), Tok.FunctionsSection),
-	(compile("Clauses:"), Tok.ClausesSection),
-	(compile("!"), Tok.Negate),
-	(compile(r"\("), Tok.OpenP),
-	(compile(r"\)"), Tok.CloseP),
-	(compile(","), Tok.Comma),
-	(compile(r"\w+"), Tok.Identifier),
-]
-"""
-A mapping of regexs to the tokens the identify
-
-Tokens earlier on in the list should be regarded as higher priority, even if a match lower
-on the list also matches.  All unicode strings should be matched by at least one token.
-"""

 class Variable(IntEnum):
 	Start = auto()
@ -179,27 +140,6 @@ class ASTProp:
 			arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments])
 			return map_res(p(IRProp, self.ident), arg_ir)

-@dataclass(frozen=True)
-class IRProp:
-    lexeme: Lexeme[Tok]
-    arguments: 'Sequence[IRTerm]'
-    def __str__(self) -> str:
-        return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
-
-@dataclass(frozen=True)
-class IRVar:
-    lexeme: Lexeme[Tok]
-    def __str__(self) -> str:
-        return f'*{self.lexeme.matched_string}'
-
-@dataclass(frozen=True)
-class IRNeg:
-    inner: 'IRTerm'
-    def __str__(self) -> str:
-        return f'¬{self.inner}'
-
-IRTerm: TypeAlias = IRVar | IRProp | IRNeg
-
@cur2
 def make_ir(
 	idents: IdentBindings,
--- a/ir.py
+++ b/ir.py
@ -0,0 +1,26 @@
+from dataclasses import dataclass
+from typing import Sequence, TypeAlias
+
+from lex import Lexeme
+from tokens import Tok
+
+@dataclass(frozen=True)
+class IRProp:
+    lexeme: Lexeme[Tok]
+    arguments: 'Sequence[IRTerm]'
+    def __str__(self) -> str:
+        return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
+
+@dataclass(frozen=True)
+class IRVar:
+    lexeme: Lexeme[Tok]
+    def __str__(self) -> str:
+        return f'*{self.lexeme.matched_string}'
+
+@dataclass(frozen=True)
+class IRNeg:
+    inner: 'IRTerm'
+    def __str__(self) -> str:
+        return f'¬{self.inner}'
+
+IRTerm: TypeAlias = IRVar | IRProp | IRNeg
--- a/tokens.py
+++ b/tokens.py
@ -0,0 +1,45 @@
+from enum import auto, IntEnum
+from typing import Collection, Tuple
+from re import compile, Pattern
+
+class Tok(IntEnum):
+	"""
+	All possible tokens used in the grammar
+	"""
+	Newline = auto()
+	Whitespace = auto()
+	PredicateSection = auto()
+	VariablesSection = auto()
+	ConstantsSection = auto()
+	FunctionsSection = auto()
+	ClausesSection = auto()
+	Negate = auto()
+	OpenP = auto()
+	CloseP = auto()
+	Comma = auto()
+	Identifier = auto()
+	Eof = auto()
+
+	def __repr__(self):
+		return self._name_
+
+LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
+	(compile(r"\n"), Tok.Newline),
+	(compile(r"[ \t]+"), Tok.Whitespace),
+	(compile("Predicates:"), Tok.PredicateSection),
+	(compile("Variables:"), Tok.VariablesSection),
+	(compile("Constants:"), Tok.ConstantsSection),
+	(compile("Functions:"), Tok.FunctionsSection),
+	(compile("Clauses:"), Tok.ClausesSection),
+	(compile("!"), Tok.Negate),
+	(compile(r"\("), Tok.OpenP),
+	(compile(r"\)"), Tok.CloseP),
+	(compile(","), Tok.Comma),
+	(compile(r"\w+"), Tok.Identifier),
+]
+"""
+A mapping of regexs to the tokens the identify
+
+Tokens earlier on in the list should be regarded as higher priority, even if a match lower
+on the list also matches.  All unicode strings should be matched by at least one token.
+"""