Factor out IR and Tok

2023-03-05 16:44:30 -05:00 · 2023-03-05 16:44:30 -05:00 · f84a340f0a
parent 9e6c7d504d
commit f84a340f0a
3 changed files with 73 additions and 62 deletions
--- a/grammar.py
+++ b/grammar.py
@ -12,52 +12,13 @@ from dataclasses import dataclass
 from enum import auto, IntEnum
 from re import compile, Pattern
 from ir import IRNeg, IRProp, IRTerm, IRVar
 from lex import Lexeme
 from parse import Action
 from tokens import *
 from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
 class Tok(IntEnum):
 	"""
 	All possible tokens used in the grammar
 	"""
 	Newline = auto()
 	Whitespace = auto()
 	PredicateSection = auto()
 	VariablesSection = auto()
 	ConstantsSection = auto()
 	FunctionsSection = auto()
 	ClausesSection = auto()
 	Negate = auto()
 	OpenP = auto()
 	CloseP = auto()
 	Comma = auto()
 	Identifier = auto()
 	Eof = auto()
 	def __repr__(self):
 		return self._name_
 LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
 	(compile(r"\n"), Tok.Newline),
 	(compile(r"[ \t]+"), Tok.Whitespace),
 	(compile("Predicates:"), Tok.PredicateSection),
 	(compile("Variables:"), Tok.VariablesSection),
 	(compile("Constants:"), Tok.ConstantsSection),
 	(compile("Functions:"), Tok.FunctionsSection),
 	(compile("Clauses:"), Tok.ClausesSection),
 	(compile("!"), Tok.Negate),
 	(compile(r"\("), Tok.OpenP),
 	(compile(r"\)"), Tok.CloseP),
 	(compile(","), Tok.Comma),
 	(compile(r"\w+"), Tok.Identifier),
 ]
 """
 A mapping of regexs to the tokens the identify
 Tokens earlier on in the list should be regarded as higher priority, even if a match lower
 on the list also matches.  All unicode strings should be matched by at least one token.
 """
 class Variable(IntEnum):
 	Start = auto()
@ -179,27 +140,6 @@ class ASTProp:
 			arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments])
 			return map_res(p(IRProp, self.ident), arg_ir)
@dataclass(frozen=True)
 class IRProp:
    lexeme: Lexeme[Tok]
    arguments: 'Sequence[IRTerm]'
    def __str__(self) -> str:
        return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
@dataclass(frozen=True)
 class IRVar:
    lexeme: Lexeme[Tok]
    def __str__(self) -> str:
        return f'*{self.lexeme.matched_string}'
@dataclass(frozen=True)
 class IRNeg:
    inner: 'IRTerm'
    def __str__(self) -> str:
        return f'¬{self.inner}'
 IRTerm: TypeAlias = IRVar | IRProp | IRNeg
@cur2
 def make_ir(
 	idents: IdentBindings,
--- a/ir.py
+++ b/ir.py
@ -0,0 +1,26 @@
 from dataclasses import dataclass
 from typing import Sequence, TypeAlias
 from lex import Lexeme
 from tokens import Tok
@dataclass(frozen=True)
 class IRProp:
    lexeme: Lexeme[Tok]
    arguments: 'Sequence[IRTerm]'
    def __str__(self) -> str:
        return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
@dataclass(frozen=True)
 class IRVar:
    lexeme: Lexeme[Tok]
    def __str__(self) -> str:
        return f'*{self.lexeme.matched_string}'
@dataclass(frozen=True)
 class IRNeg:
    inner: 'IRTerm'
    def __str__(self) -> str:
        return f'¬{self.inner}'
 IRTerm: TypeAlias = IRVar | IRProp | IRNeg
--- a/tokens.py
+++ b/tokens.py
@ -0,0 +1,45 @@
 from enum import auto, IntEnum
 from typing import Collection, Tuple
 from re import compile, Pattern
 class Tok(IntEnum):
 	"""
 	All possible tokens used in the grammar
 	"""
 	Newline = auto()
 	Whitespace = auto()
 	PredicateSection = auto()
 	VariablesSection = auto()
 	ConstantsSection = auto()
 	FunctionsSection = auto()
 	ClausesSection = auto()
 	Negate = auto()
 	OpenP = auto()
 	CloseP = auto()
 	Comma = auto()
 	Identifier = auto()
 	Eof = auto()
 	def __repr__(self):
 		return self._name_
 LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
 	(compile(r"\n"), Tok.Newline),
 	(compile(r"[ \t]+"), Tok.Whitespace),
 	(compile("Predicates:"), Tok.PredicateSection),
 	(compile("Variables:"), Tok.VariablesSection),
 	(compile("Constants:"), Tok.ConstantsSection),
 	(compile("Functions:"), Tok.FunctionsSection),
 	(compile("Clauses:"), Tok.ClausesSection),
 	(compile("!"), Tok.Negate),
 	(compile(r"\("), Tok.OpenP),
 	(compile(r"\)"), Tok.CloseP),
 	(compile(","), Tok.Comma),
 	(compile(r"\w+"), Tok.Identifier),
 ]
 """
 A mapping of regexs to the tokens the identify
 Tokens earlier on in the list should be regarded as higher priority, even if a match lower
 on the list also matches.  All unicode strings should be matched by at least one token.
 """