From f84a340f0aba4fb4111f395d0e3f296ed403b455 Mon Sep 17 00:00:00 2001
From: Emi Simpson <emi@alchemi.dev>
Date: Sun, 5 Mar 2023 16:44:30 -0500
Subject: [PATCH] Factor out IR and Tok

---
 grammar.py | 64 ++----------------------------------------------------
 ir.py      | 26 ++++++++++++++++++++++
 tokens.py  | 45 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 62 deletions(-)
 create mode 100644 ir.py
 create mode 100644 tokens.py

diff --git a/grammar.py b/grammar.py
index 015f9fe..9e4a13f 100644
--- a/grammar.py
+++ b/grammar.py
@@ -12,52 +12,13 @@ from dataclasses import dataclass
 from enum import auto, IntEnum
 from re import compile, Pattern
 
+from ir import IRNeg, IRProp, IRTerm, IRVar
 from lex import Lexeme
 from parse import Action
+from tokens import *
 
 from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
 
-class Tok(IntEnum):
-	"""
-	All possible tokens used in the grammar
-	"""
-	Newline = auto()
-	Whitespace = auto()
-	PredicateSection = auto()
-	VariablesSection = auto()
-	ConstantsSection = auto()
-	FunctionsSection = auto()
-	ClausesSection = auto()
-	Negate = auto()
-	OpenP = auto()
-	CloseP = auto()
-	Comma = auto()
-	Identifier = auto()
-	Eof = auto()
-
-	def __repr__(self):
-		return self._name_
-
-LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
-	(compile(r"\n"), Tok.Newline),
-	(compile(r"[ \t]+"), Tok.Whitespace),
-	(compile("Predicates:"), Tok.PredicateSection),
-	(compile("Variables:"), Tok.VariablesSection),
-	(compile("Constants:"), Tok.ConstantsSection),
-	(compile("Functions:"), Tok.FunctionsSection),
-	(compile("Clauses:"), Tok.ClausesSection),
-	(compile("!"), Tok.Negate),
-	(compile(r"\("), Tok.OpenP),
-	(compile(r"\)"), Tok.CloseP),
-	(compile(","), Tok.Comma),
-	(compile(r"\w+"), Tok.Identifier),
-]
-"""
-A mapping of regexs to the tokens the identify
-
-Tokens earlier on in the list should be regarded as higher priority, even if a match lower
-on the list also matches.  All unicode strings should be matched by at least one token.
-"""
 
 class Variable(IntEnum):
 	Start = auto()
@@ -179,27 +140,6 @@ class ASTProp:
 			arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments])
 			return map_res(p(IRProp, self.ident), arg_ir)
 
-@dataclass(frozen=True)
-class IRProp:
-    lexeme: Lexeme[Tok]
-    arguments: 'Sequence[IRTerm]'
-    def __str__(self) -> str:
-        return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
-
-@dataclass(frozen=True)
-class IRVar:
-    lexeme: Lexeme[Tok]
-    def __str__(self) -> str:
-        return f'*{self.lexeme.matched_string}'
-
-@dataclass(frozen=True)
-class IRNeg:
-    inner: 'IRTerm'
-    def __str__(self) -> str:
-        return f'¬{self.inner}'
-
-IRTerm: TypeAlias = IRVar | IRProp | IRNeg
-
 @cur2
 def make_ir(
 	idents: IdentBindings,
diff --git a/ir.py b/ir.py
new file mode 100644
index 0000000..ebb239e
--- /dev/null
+++ b/ir.py
@@ -0,0 +1,26 @@
+from dataclasses import dataclass
+from typing import Sequence, TypeAlias
+
+from lex import Lexeme
+from tokens import Tok
+
+@dataclass(frozen=True)
+class IRProp:
+    lexeme: Lexeme[Tok]
+    arguments: 'Sequence[IRTerm]'
+    def __str__(self) -> str:
+        return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
+
+@dataclass(frozen=True)
+class IRVar:
+    lexeme: Lexeme[Tok]
+    def __str__(self) -> str:
+        return f'*{self.lexeme.matched_string}'
+
+@dataclass(frozen=True)
+class IRNeg:
+    inner: 'IRTerm'
+    def __str__(self) -> str:
+        return f'¬{self.inner}'
+
+IRTerm: TypeAlias = IRVar | IRProp | IRNeg
\ No newline at end of file
diff --git a/tokens.py b/tokens.py
new file mode 100644
index 0000000..897501d
--- /dev/null
+++ b/tokens.py
@@ -0,0 +1,45 @@
+from enum import auto, IntEnum
+from typing import Collection, Tuple
+from re import compile, Pattern
+
+class Tok(IntEnum):
+	"""
+	All possible tokens used in the grammar
+	"""
+	Newline = auto()
+	Whitespace = auto()
+	PredicateSection = auto()
+	VariablesSection = auto()
+	ConstantsSection = auto()
+	FunctionsSection = auto()
+	ClausesSection = auto()
+	Negate = auto()
+	OpenP = auto()
+	CloseP = auto()
+	Comma = auto()
+	Identifier = auto()
+	Eof = auto()
+
+	def __repr__(self):
+		return self._name_
+
+LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
+	(compile(r"\n"), Tok.Newline),
+	(compile(r"[ \t]+"), Tok.Whitespace),
+	(compile("Predicates:"), Tok.PredicateSection),
+	(compile("Variables:"), Tok.VariablesSection),
+	(compile("Constants:"), Tok.ConstantsSection),
+	(compile("Functions:"), Tok.FunctionsSection),
+	(compile("Clauses:"), Tok.ClausesSection),
+	(compile("!"), Tok.Negate),
+	(compile(r"\("), Tok.OpenP),
+	(compile(r"\)"), Tok.CloseP),
+	(compile(","), Tok.Comma),
+	(compile(r"\w+"), Tok.Identifier),
+]
+"""
+A mapping of regexs to the tokens the identify
+
+Tokens earlier on in the list should be regarded as higher priority, even if a match lower
+on the list also matches.  All unicode strings should be matched by at least one token.
+"""
\ No newline at end of file