Factor out IR and Tok
This commit is contained in:
parent
9e6c7d504d
commit
f84a340f0a
64
grammar.py
64
grammar.py
|
@ -12,52 +12,13 @@ from dataclasses import dataclass
|
||||||
from enum import auto, IntEnum
|
from enum import auto, IntEnum
|
||||||
from re import compile, Pattern
|
from re import compile, Pattern
|
||||||
|
|
||||||
|
from ir import IRNeg, IRProp, IRTerm, IRVar
|
||||||
from lex import Lexeme
|
from lex import Lexeme
|
||||||
from parse import Action
|
from parse import Action
|
||||||
|
from tokens import *
|
||||||
|
|
||||||
from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
|
from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias
|
||||||
|
|
||||||
class Tok(IntEnum):
|
|
||||||
"""
|
|
||||||
All possible tokens used in the grammar
|
|
||||||
"""
|
|
||||||
Newline = auto()
|
|
||||||
Whitespace = auto()
|
|
||||||
PredicateSection = auto()
|
|
||||||
VariablesSection = auto()
|
|
||||||
ConstantsSection = auto()
|
|
||||||
FunctionsSection = auto()
|
|
||||||
ClausesSection = auto()
|
|
||||||
Negate = auto()
|
|
||||||
OpenP = auto()
|
|
||||||
CloseP = auto()
|
|
||||||
Comma = auto()
|
|
||||||
Identifier = auto()
|
|
||||||
Eof = auto()
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return self._name_
|
|
||||||
|
|
||||||
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
|
||||||
(compile(r"\n"), Tok.Newline),
|
|
||||||
(compile(r"[ \t]+"), Tok.Whitespace),
|
|
||||||
(compile("Predicates:"), Tok.PredicateSection),
|
|
||||||
(compile("Variables:"), Tok.VariablesSection),
|
|
||||||
(compile("Constants:"), Tok.ConstantsSection),
|
|
||||||
(compile("Functions:"), Tok.FunctionsSection),
|
|
||||||
(compile("Clauses:"), Tok.ClausesSection),
|
|
||||||
(compile("!"), Tok.Negate),
|
|
||||||
(compile(r"\("), Tok.OpenP),
|
|
||||||
(compile(r"\)"), Tok.CloseP),
|
|
||||||
(compile(","), Tok.Comma),
|
|
||||||
(compile(r"\w+"), Tok.Identifier),
|
|
||||||
]
|
|
||||||
"""
|
|
||||||
A mapping of regexs to the tokens the identify
|
|
||||||
|
|
||||||
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
|
||||||
on the list also matches. All unicode strings should be matched by at least one token.
|
|
||||||
"""
|
|
||||||
|
|
||||||
class Variable(IntEnum):
|
class Variable(IntEnum):
|
||||||
Start = auto()
|
Start = auto()
|
||||||
|
@ -179,27 +140,6 @@ class ASTProp:
|
||||||
arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments])
|
arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments])
|
||||||
return map_res(p(IRProp, self.ident), arg_ir)
|
return map_res(p(IRProp, self.ident), arg_ir)
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class IRProp:
|
|
||||||
lexeme: Lexeme[Tok]
|
|
||||||
arguments: 'Sequence[IRTerm]'
|
|
||||||
def __str__(self) -> str:
|
|
||||||
return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class IRVar:
|
|
||||||
lexeme: Lexeme[Tok]
|
|
||||||
def __str__(self) -> str:
|
|
||||||
return f'*{self.lexeme.matched_string}'
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class IRNeg:
|
|
||||||
inner: 'IRTerm'
|
|
||||||
def __str__(self) -> str:
|
|
||||||
return f'¬{self.inner}'
|
|
||||||
|
|
||||||
IRTerm: TypeAlias = IRVar | IRProp | IRNeg
|
|
||||||
|
|
||||||
@cur2
|
@cur2
|
||||||
def make_ir(
|
def make_ir(
|
||||||
idents: IdentBindings,
|
idents: IdentBindings,
|
||||||
|
|
26
ir.py
Normal file
26
ir.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Sequence, TypeAlias
|
||||||
|
|
||||||
|
from lex import Lexeme
|
||||||
|
from tokens import Tok
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class IRProp:
|
||||||
|
lexeme: Lexeme[Tok]
|
||||||
|
arguments: 'Sequence[IRTerm]'
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class IRVar:
|
||||||
|
lexeme: Lexeme[Tok]
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f'*{self.lexeme.matched_string}'
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class IRNeg:
|
||||||
|
inner: 'IRTerm'
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f'¬{self.inner}'
|
||||||
|
|
||||||
|
IRTerm: TypeAlias = IRVar | IRProp | IRNeg
|
45
tokens.py
Normal file
45
tokens.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
from enum import auto, IntEnum
|
||||||
|
from typing import Collection, Tuple
|
||||||
|
from re import compile, Pattern
|
||||||
|
|
||||||
|
class Tok(IntEnum):
|
||||||
|
"""
|
||||||
|
All possible tokens used in the grammar
|
||||||
|
"""
|
||||||
|
Newline = auto()
|
||||||
|
Whitespace = auto()
|
||||||
|
PredicateSection = auto()
|
||||||
|
VariablesSection = auto()
|
||||||
|
ConstantsSection = auto()
|
||||||
|
FunctionsSection = auto()
|
||||||
|
ClausesSection = auto()
|
||||||
|
Negate = auto()
|
||||||
|
OpenP = auto()
|
||||||
|
CloseP = auto()
|
||||||
|
Comma = auto()
|
||||||
|
Identifier = auto()
|
||||||
|
Eof = auto()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self._name_
|
||||||
|
|
||||||
|
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
||||||
|
(compile(r"\n"), Tok.Newline),
|
||||||
|
(compile(r"[ \t]+"), Tok.Whitespace),
|
||||||
|
(compile("Predicates:"), Tok.PredicateSection),
|
||||||
|
(compile("Variables:"), Tok.VariablesSection),
|
||||||
|
(compile("Constants:"), Tok.ConstantsSection),
|
||||||
|
(compile("Functions:"), Tok.FunctionsSection),
|
||||||
|
(compile("Clauses:"), Tok.ClausesSection),
|
||||||
|
(compile("!"), Tok.Negate),
|
||||||
|
(compile(r"\("), Tok.OpenP),
|
||||||
|
(compile(r"\)"), Tok.CloseP),
|
||||||
|
(compile(","), Tok.Comma),
|
||||||
|
(compile(r"\w+"), Tok.Identifier),
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
A mapping of regexs to the tokens the identify
|
||||||
|
|
||||||
|
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
||||||
|
on the list also matches. All unicode strings should be matched by at least one token.
|
||||||
|
"""
|
Loading…
Reference in a new issue