Factor out IR and Tok

This commit is contained in:
Emi Simpson 2023-03-05 16:44:30 -05:00
parent 9e6c7d504d
commit f84a340f0a
Signed by: Emi
GPG key ID: A12F2C2FFDC3D847
3 changed files with 73 additions and 62 deletions

View file

@ -12,52 +12,13 @@ from dataclasses import dataclass
from enum import auto, IntEnum from enum import auto, IntEnum
from re import compile, Pattern from re import compile, Pattern
from ir import IRNeg, IRProp, IRTerm, IRVar
from lex import Lexeme from lex import Lexeme
from parse import Action from parse import Action
from tokens import *
from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
class Tok(IntEnum):
"""
All possible tokens used in the grammar
"""
Newline = auto()
Whitespace = auto()
PredicateSection = auto()
VariablesSection = auto()
ConstantsSection = auto()
FunctionsSection = auto()
ClausesSection = auto()
Negate = auto()
OpenP = auto()
CloseP = auto()
Comma = auto()
Identifier = auto()
Eof = auto()
def __repr__(self):
return self._name_
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
(compile(r"\n"), Tok.Newline),
(compile(r"[ \t]+"), Tok.Whitespace),
(compile("Predicates:"), Tok.PredicateSection),
(compile("Variables:"), Tok.VariablesSection),
(compile("Constants:"), Tok.ConstantsSection),
(compile("Functions:"), Tok.FunctionsSection),
(compile("Clauses:"), Tok.ClausesSection),
(compile("!"), Tok.Negate),
(compile(r"\("), Tok.OpenP),
(compile(r"\)"), Tok.CloseP),
(compile(","), Tok.Comma),
(compile(r"\w+"), Tok.Identifier),
]
"""
A mapping of regexs to the tokens the identify
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
on the list also matches. All unicode strings should be matched by at least one token.
"""
class Variable(IntEnum): class Variable(IntEnum):
Start = auto() Start = auto()
@ -179,27 +140,6 @@ class ASTProp:
arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments]) arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments])
return map_res(p(IRProp, self.ident), arg_ir) return map_res(p(IRProp, self.ident), arg_ir)
@dataclass(frozen=True)
class IRProp:
lexeme: Lexeme[Tok]
arguments: 'Sequence[IRTerm]'
def __str__(self) -> str:
return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
@dataclass(frozen=True)
class IRVar:
lexeme: Lexeme[Tok]
def __str__(self) -> str:
return f'*{self.lexeme.matched_string}'
@dataclass(frozen=True)
class IRNeg:
inner: 'IRTerm'
def __str__(self) -> str:
return f'¬{self.inner}'
IRTerm: TypeAlias = IRVar | IRProp | IRNeg
@cur2 @cur2
def make_ir( def make_ir(
idents: IdentBindings, idents: IdentBindings,

26
ir.py Normal file
View file

@ -0,0 +1,26 @@
from dataclasses import dataclass
from typing import Sequence, TypeAlias
from lex import Lexeme
from tokens import Tok
@dataclass(frozen=True)
class IRProp:
lexeme: Lexeme[Tok]
arguments: 'Sequence[IRTerm]'
def __str__(self) -> str:
return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
@dataclass(frozen=True)
class IRVar:
lexeme: Lexeme[Tok]
def __str__(self) -> str:
return f'*{self.lexeme.matched_string}'
@dataclass(frozen=True)
class IRNeg:
inner: 'IRTerm'
def __str__(self) -> str:
return f'¬{self.inner}'
IRTerm: TypeAlias = IRVar | IRProp | IRNeg

45
tokens.py Normal file
View file

@ -0,0 +1,45 @@
from enum import auto, IntEnum
from typing import Collection, Tuple
from re import compile, Pattern
class Tok(IntEnum):
"""
All possible tokens used in the grammar
"""
Newline = auto()
Whitespace = auto()
PredicateSection = auto()
VariablesSection = auto()
ConstantsSection = auto()
FunctionsSection = auto()
ClausesSection = auto()
Negate = auto()
OpenP = auto()
CloseP = auto()
Comma = auto()
Identifier = auto()
Eof = auto()
def __repr__(self):
return self._name_
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
(compile(r"\n"), Tok.Newline),
(compile(r"[ \t]+"), Tok.Whitespace),
(compile("Predicates:"), Tok.PredicateSection),
(compile("Variables:"), Tok.VariablesSection),
(compile("Constants:"), Tok.ConstantsSection),
(compile("Functions:"), Tok.FunctionsSection),
(compile("Clauses:"), Tok.ClausesSection),
(compile("!"), Tok.Negate),
(compile(r"\("), Tok.OpenP),
(compile(r"\)"), Tok.CloseP),
(compile(","), Tok.Comma),
(compile(r"\w+"), Tok.Identifier),
]
"""
A mapping of regexs to the tokens the identify
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
on the list also matches. All unicode strings should be matched by at least one token.
"""