From f84a340f0aba4fb4111f395d0e3f296ed403b455 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Sun, 5 Mar 2023 16:44:30 -0500 Subject: [PATCH] Factor out IR and Tok --- grammar.py | 64 ++---------------------------------------------------- ir.py | 26 ++++++++++++++++++++++ tokens.py | 45 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 62 deletions(-) create mode 100644 ir.py create mode 100644 tokens.py diff --git a/grammar.py b/grammar.py index 015f9fe..9e4a13f 100644 --- a/grammar.py +++ b/grammar.py @@ -12,52 +12,13 @@ from dataclasses import dataclass from enum import auto, IntEnum from re import compile, Pattern +from ir import IRNeg, IRProp, IRTerm, IRVar from lex import Lexeme from parse import Action +from tokens import * from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias -class Tok(IntEnum): - """ - All possible tokens used in the grammar - """ - Newline = auto() - Whitespace = auto() - PredicateSection = auto() - VariablesSection = auto() - ConstantsSection = auto() - FunctionsSection = auto() - ClausesSection = auto() - Negate = auto() - OpenP = auto() - CloseP = auto() - Comma = auto() - Identifier = auto() - Eof = auto() - - def __repr__(self): - return self._name_ - -LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [ - (compile(r"\n"), Tok.Newline), - (compile(r"[ \t]+"), Tok.Whitespace), - (compile("Predicates:"), Tok.PredicateSection), - (compile("Variables:"), Tok.VariablesSection), - (compile("Constants:"), Tok.ConstantsSection), - (compile("Functions:"), Tok.FunctionsSection), - (compile("Clauses:"), Tok.ClausesSection), - (compile("!"), Tok.Negate), - (compile(r"\("), Tok.OpenP), - (compile(r"\)"), Tok.CloseP), - (compile(","), Tok.Comma), - (compile(r"\w+"), Tok.Identifier), -] -""" -A mapping of regexs to the tokens the identify - -Tokens earlier on in the list should be regarded as higher priority, even if a match lower -on the list also matches. All unicode strings should be matched by at least one token. -""" class Variable(IntEnum): Start = auto() @@ -179,27 +140,6 @@ class ASTProp: arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments]) return map_res(p(IRProp, self.ident), arg_ir) -@dataclass(frozen=True) -class IRProp: - lexeme: Lexeme[Tok] - arguments: 'Sequence[IRTerm]' - def __str__(self) -> str: - return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})' - -@dataclass(frozen=True) -class IRVar: - lexeme: Lexeme[Tok] - def __str__(self) -> str: - return f'*{self.lexeme.matched_string}' - -@dataclass(frozen=True) -class IRNeg: - inner: 'IRTerm' - def __str__(self) -> str: - return f'¬{self.inner}' - -IRTerm: TypeAlias = IRVar | IRProp | IRNeg - @cur2 def make_ir( idents: IdentBindings, diff --git a/ir.py b/ir.py new file mode 100644 index 0000000..ebb239e --- /dev/null +++ b/ir.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from typing import Sequence, TypeAlias + +from lex import Lexeme +from tokens import Tok + +@dataclass(frozen=True) +class IRProp: + lexeme: Lexeme[Tok] + arguments: 'Sequence[IRTerm]' + def __str__(self) -> str: + return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})' + +@dataclass(frozen=True) +class IRVar: + lexeme: Lexeme[Tok] + def __str__(self) -> str: + return f'*{self.lexeme.matched_string}' + +@dataclass(frozen=True) +class IRNeg: + inner: 'IRTerm' + def __str__(self) -> str: + return f'¬{self.inner}' + +IRTerm: TypeAlias = IRVar | IRProp | IRNeg \ No newline at end of file diff --git a/tokens.py b/tokens.py new file mode 100644 index 0000000..897501d --- /dev/null +++ b/tokens.py @@ -0,0 +1,45 @@ +from enum import auto, IntEnum +from typing import Collection, Tuple +from re import compile, Pattern + +class Tok(IntEnum): + """ + All possible tokens used in the grammar + """ + Newline = auto() + Whitespace = auto() + PredicateSection = auto() + VariablesSection = auto() + ConstantsSection = auto() + FunctionsSection = auto() + ClausesSection = auto() + Negate = auto() + OpenP = auto() + CloseP = auto() + Comma = auto() + Identifier = auto() + Eof = auto() + + def __repr__(self): + return self._name_ + +LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [ + (compile(r"\n"), Tok.Newline), + (compile(r"[ \t]+"), Tok.Whitespace), + (compile("Predicates:"), Tok.PredicateSection), + (compile("Variables:"), Tok.VariablesSection), + (compile("Constants:"), Tok.ConstantsSection), + (compile("Functions:"), Tok.FunctionsSection), + (compile("Clauses:"), Tok.ClausesSection), + (compile("!"), Tok.Negate), + (compile(r"\("), Tok.OpenP), + (compile(r"\)"), Tok.CloseP), + (compile(","), Tok.Comma), + (compile(r"\w+"), Tok.Identifier), +] +""" +A mapping of regexs to the tokens the identify + +Tokens earlier on in the list should be regarded as higher priority, even if a match lower +on the list also matches. All unicode strings should be matched by at least one token. +""" \ No newline at end of file