Factor out IR and Tok
This commit is contained in:
parent
9e6c7d504d
commit
f84a340f0a
64
grammar.py
64
grammar.py
|
@ -12,52 +12,13 @@ from dataclasses import dataclass
|
|||
from enum import auto, IntEnum
|
||||
from re import compile, Pattern
|
||||
|
||||
from ir import IRNeg, IRProp, IRTerm, IRVar
|
||||
from lex import Lexeme
|
||||
from parse import Action
|
||||
from tokens import *
|
||||
|
||||
from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
|
||||
|
||||
class Tok(IntEnum):
|
||||
"""
|
||||
All possible tokens used in the grammar
|
||||
"""
|
||||
Newline = auto()
|
||||
Whitespace = auto()
|
||||
PredicateSection = auto()
|
||||
VariablesSection = auto()
|
||||
ConstantsSection = auto()
|
||||
FunctionsSection = auto()
|
||||
ClausesSection = auto()
|
||||
Negate = auto()
|
||||
OpenP = auto()
|
||||
CloseP = auto()
|
||||
Comma = auto()
|
||||
Identifier = auto()
|
||||
Eof = auto()
|
||||
|
||||
def __repr__(self):
|
||||
return self._name_
|
||||
|
||||
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
||||
(compile(r"\n"), Tok.Newline),
|
||||
(compile(r"[ \t]+"), Tok.Whitespace),
|
||||
(compile("Predicates:"), Tok.PredicateSection),
|
||||
(compile("Variables:"), Tok.VariablesSection),
|
||||
(compile("Constants:"), Tok.ConstantsSection),
|
||||
(compile("Functions:"), Tok.FunctionsSection),
|
||||
(compile("Clauses:"), Tok.ClausesSection),
|
||||
(compile("!"), Tok.Negate),
|
||||
(compile(r"\("), Tok.OpenP),
|
||||
(compile(r"\)"), Tok.CloseP),
|
||||
(compile(","), Tok.Comma),
|
||||
(compile(r"\w+"), Tok.Identifier),
|
||||
]
|
||||
"""
|
||||
A mapping of regexs to the tokens the identify
|
||||
|
||||
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
||||
on the list also matches. All unicode strings should be matched by at least one token.
|
||||
"""
|
||||
|
||||
class Variable(IntEnum):
|
||||
Start = auto()
|
||||
|
@ -179,27 +140,6 @@ class ASTProp:
|
|||
arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments])
|
||||
return map_res(p(IRProp, self.ident), arg_ir)
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IRProp:
|
||||
lexeme: Lexeme[Tok]
|
||||
arguments: 'Sequence[IRTerm]'
|
||||
def __str__(self) -> str:
|
||||
return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IRVar:
|
||||
lexeme: Lexeme[Tok]
|
||||
def __str__(self) -> str:
|
||||
return f'*{self.lexeme.matched_string}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IRNeg:
|
||||
inner: 'IRTerm'
|
||||
def __str__(self) -> str:
|
||||
return f'¬{self.inner}'
|
||||
|
||||
IRTerm: TypeAlias = IRVar | IRProp | IRNeg
|
||||
|
||||
@cur2
|
||||
def make_ir(
|
||||
idents: IdentBindings,
|
||||
|
|
26
ir.py
Normal file
26
ir.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Sequence, TypeAlias
|
||||
|
||||
from lex import Lexeme
|
||||
from tokens import Tok
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IRProp:
|
||||
lexeme: Lexeme[Tok]
|
||||
arguments: 'Sequence[IRTerm]'
|
||||
def __str__(self) -> str:
|
||||
return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IRVar:
|
||||
lexeme: Lexeme[Tok]
|
||||
def __str__(self) -> str:
|
||||
return f'*{self.lexeme.matched_string}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IRNeg:
|
||||
inner: 'IRTerm'
|
||||
def __str__(self) -> str:
|
||||
return f'¬{self.inner}'
|
||||
|
||||
IRTerm: TypeAlias = IRVar | IRProp | IRNeg
|
45
tokens.py
Normal file
45
tokens.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
from enum import auto, IntEnum
|
||||
from typing import Collection, Tuple
|
||||
from re import compile, Pattern
|
||||
|
||||
class Tok(IntEnum):
|
||||
"""
|
||||
All possible tokens used in the grammar
|
||||
"""
|
||||
Newline = auto()
|
||||
Whitespace = auto()
|
||||
PredicateSection = auto()
|
||||
VariablesSection = auto()
|
||||
ConstantsSection = auto()
|
||||
FunctionsSection = auto()
|
||||
ClausesSection = auto()
|
||||
Negate = auto()
|
||||
OpenP = auto()
|
||||
CloseP = auto()
|
||||
Comma = auto()
|
||||
Identifier = auto()
|
||||
Eof = auto()
|
||||
|
||||
def __repr__(self):
|
||||
return self._name_
|
||||
|
||||
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
||||
(compile(r"\n"), Tok.Newline),
|
||||
(compile(r"[ \t]+"), Tok.Whitespace),
|
||||
(compile("Predicates:"), Tok.PredicateSection),
|
||||
(compile("Variables:"), Tok.VariablesSection),
|
||||
(compile("Constants:"), Tok.ConstantsSection),
|
||||
(compile("Functions:"), Tok.FunctionsSection),
|
||||
(compile("Clauses:"), Tok.ClausesSection),
|
||||
(compile("!"), Tok.Negate),
|
||||
(compile(r"\("), Tok.OpenP),
|
||||
(compile(r"\)"), Tok.CloseP),
|
||||
(compile(","), Tok.Comma),
|
||||
(compile(r"\w+"), Tok.Identifier),
|
||||
]
|
||||
"""
|
||||
A mapping of regexs to the tokens the identify
|
||||
|
||||
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
||||
on the list also matches. All unicode strings should be matched by at least one token.
|
||||
"""
|
Loading…
Reference in a new issue