Move grammar and lex table to their own file
This commit is contained in:
parent
00043d27dd
commit
f813d91736
|
@ -330,8 +330,7 @@ def print_oracle_table_enum(
|
|||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
from lex import Tok
|
||||
from parse import GRAMMAR, Variable
|
||||
from grammar import GRAMMAR, Tok, Variable
|
||||
failure_count, test_count = doctest.testmod()
|
||||
if failure_count:
|
||||
print('\n\nRefusing to build oracle table due to test failures')
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
cat << EOF > oracle_table.py
|
||||
from lex import Tok
|
||||
from parse import Variable
|
||||
from grammar import Tok, Variable
|
||||
|
||||
oracle_table = (
|
||||
EOF
|
||||
|
|
139
grammar.py
Normal file
139
grammar.py
Normal file
|
@ -0,0 +1,139 @@
|
|||
from enum import auto, IntEnum
|
||||
from re import compile, Pattern
|
||||
|
||||
from typing import Collection, Mapping, Sequence, Tuple
|
||||
|
||||
class Tok(IntEnum):
|
||||
"""
|
||||
All possible tokens used in the grammar
|
||||
"""
|
||||
Newline = auto()
|
||||
Whitespace = auto()
|
||||
PredicateSection = auto()
|
||||
VariablesSection = auto()
|
||||
ConstantsSection = auto()
|
||||
FunctionsSection = auto()
|
||||
ClausesSection = auto()
|
||||
Negate = auto()
|
||||
OpenP = auto()
|
||||
CloseP = auto()
|
||||
Comma = auto()
|
||||
Identifier = auto()
|
||||
Eof = auto()
|
||||
|
||||
def __repr__(self):
|
||||
return self._name_
|
||||
|
||||
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
||||
(compile(r"\n"), Tok.Newline),
|
||||
(compile(r"[ \t]+"), Tok.Whitespace),
|
||||
(compile("Predicates:"), Tok.PredicateSection),
|
||||
(compile("Variables:"), Tok.VariablesSection),
|
||||
(compile("Constants:"), Tok.ConstantsSection),
|
||||
(compile("Functions:"), Tok.FunctionsSection),
|
||||
(compile("Clauses:"), Tok.ClausesSection),
|
||||
(compile("!"), Tok.Negate),
|
||||
(compile(r"\("), Tok.OpenP),
|
||||
(compile(r"\)"), Tok.CloseP),
|
||||
(compile(","), Tok.Comma),
|
||||
(compile(r"\w+"), Tok.Identifier),
|
||||
]
|
||||
"""
|
||||
A mapping of regexs to the tokens the identify
|
||||
|
||||
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
||||
on the list also matches. All unicode strings should be matched by at least one token.
|
||||
"""
|
||||
|
||||
class Variable(IntEnum):
|
||||
Start = auto()
|
||||
Idents = auto()
|
||||
Clauses = auto()
|
||||
Clauses_ = auto()
|
||||
Clause = auto()
|
||||
Clause_ = auto()
|
||||
Term = auto()
|
||||
Func = auto()
|
||||
CSTerms = auto()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'<{self._name_}>'
|
||||
|
||||
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
|
||||
(Variable.Start,
|
||||
[ Tok.PredicateSection, Variable.Idents, Tok.Newline
|
||||
, Tok.VariablesSection, Variable.Idents, Tok.Newline
|
||||
, Tok.ConstantsSection, Variable.Idents, Tok.Newline
|
||||
, Tok.FunctionsSection, Variable.Idents, Tok.Newline
|
||||
, Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
|
||||
|
||||
(Variable.Idents,
|
||||
[ Tok.Identifier, Variable.Idents ]),
|
||||
(Variable.Idents,
|
||||
[ ]),
|
||||
|
||||
(Variable.Clauses,
|
||||
[ Tok.Newline, Variable.Clauses_ ]),
|
||||
(Variable.Clauses,
|
||||
[ ]),
|
||||
|
||||
(Variable.Clauses_,
|
||||
[ Variable.Clause, Variable.Clauses ]),
|
||||
(Variable.Clauses_,
|
||||
[ ]),
|
||||
|
||||
(Variable.Clause,
|
||||
[ Variable.Term, Variable.Clause_ ]),
|
||||
|
||||
(Variable.Clause_,
|
||||
[ Variable.Clause ]),
|
||||
(Variable.Clause_,
|
||||
[ ]),
|
||||
|
||||
(Variable.Term,
|
||||
[ Tok.Negate, Variable.Term ]),
|
||||
(Variable.Term,
|
||||
[ Tok.Identifier, Variable.Func ]),
|
||||
|
||||
(Variable.Func,
|
||||
[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
|
||||
(Variable.Func,
|
||||
[ ]),
|
||||
|
||||
(Variable.CSTerms,
|
||||
[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
|
||||
(Variable.CSTerms,
|
||||
[ ]),
|
||||
]
|
||||
"""
|
||||
Implements the following grammar:
|
||||
|
||||
Start := PredicateSection <Idents> Newline
|
||||
VariablesSection <Idents> Newline
|
||||
ConstantsSection <Idents> Newline
|
||||
FunctionsSection <Idents> Newline
|
||||
ClausesSection <Clauses> Eof
|
||||
|
||||
Idents := Identifier <Idents>
|
||||
:= ε
|
||||
|
||||
Clauses := Newline <Clauses'>
|
||||
:= ε
|
||||
|
||||
Clauses' := <Clause> <Clauses>
|
||||
:= ε
|
||||
|
||||
Clause := <Term> <Clause'>
|
||||
|
||||
Clause' := <Clause>
|
||||
:= ε
|
||||
|
||||
Term := Negate <Term>
|
||||
:= Identifier <Func?>
|
||||
|
||||
Func? := OpenP <Term> <CSTerms> CloseP
|
||||
:= ε
|
||||
|
||||
CSTerms := Comma <Term> <CSTerms>
|
||||
:= ε
|
||||
"""
|
46
lex.py
46
lex.py
|
@ -3,52 +3,10 @@ from emis_funky_funktions import *
|
|||
from dataclasses import dataclass
|
||||
from enum import auto, IntEnum
|
||||
from operator import is_not
|
||||
from re import compile, Pattern
|
||||
from re import Pattern
|
||||
|
||||
from typing import Collection, Tuple, List, NewType
|
||||
|
||||
class Tok(IntEnum):
|
||||
"""
|
||||
All possible tokens used in the grammar
|
||||
"""
|
||||
Newline = auto()
|
||||
Whitespace = auto()
|
||||
PredicateSection = auto()
|
||||
VariablesSection = auto()
|
||||
ConstantsSection = auto()
|
||||
FunctionsSection = auto()
|
||||
ClausesSection = auto()
|
||||
Negate = auto()
|
||||
OpenP = auto()
|
||||
CloseP = auto()
|
||||
Comma = auto()
|
||||
Identifier = auto()
|
||||
Eof = auto()
|
||||
|
||||
def __repr__(self):
|
||||
return self._name_
|
||||
|
||||
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
||||
(compile(r"\n"), Tok.Newline),
|
||||
(compile(r"[ \t]+"), Tok.Whitespace),
|
||||
(compile("Predicates:"), Tok.PredicateSection),
|
||||
(compile("Variables:"), Tok.VariablesSection),
|
||||
(compile("Constants:"), Tok.ConstantsSection),
|
||||
(compile("Functions:"), Tok.FunctionsSection),
|
||||
(compile("Clauses:"), Tok.ClausesSection),
|
||||
(compile("!"), Tok.Negate),
|
||||
(compile(r"\("), Tok.OpenP),
|
||||
(compile(r"\)"), Tok.CloseP),
|
||||
(compile(","), Tok.Comma),
|
||||
(compile(r"\w+"), Tok.Identifier),
|
||||
]
|
||||
"""
|
||||
A mapping of regexs to the tokens the identify
|
||||
|
||||
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
||||
on the list also matches. All unicode strings should be matched by at least one token.
|
||||
"""
|
||||
|
||||
def try_lex1(regex: Pattern[str], tok: A, input: str) -> Option[Tuple[Tuple[A, str], str]]:
|
||||
"""
|
||||
Attempt to recognize a single token against a full input string
|
||||
|
@ -112,4 +70,6 @@ def tokenize(lex_table: Collection[Tuple[Pattern[str], A]], drop_tokens: Collect
|
|||
if __name__ == '__main__':
|
||||
# print(tokenize(open('sample.cnf').read()))
|
||||
import doctest
|
||||
from re import compile
|
||||
from grammar import Tok, LEX_TABLE
|
||||
doctest.testmod()
|
144
parse.py
144
parse.py
|
@ -1,144 +0,0 @@
|
|||
from emis_funky_funktions import *
|
||||
|
||||
from enum import auto, IntEnum
|
||||
from functools import cache, reduce
|
||||
from operator import getitem
|
||||
from typing import Any, cast, Collection, Mapping, Sequence, Set, Tuple, TypeGuard
|
||||
|
||||
from lex import Tok
|
||||
|
||||
"""
|
||||
Implements a parser for the following grammar:
|
||||
|
||||
Start := PredicateSection <Idents> Newline
|
||||
VariablesSection <Idents> Newline
|
||||
ConstantsSection <Idents> Newline
|
||||
FunctionsSection <Idents> Newline
|
||||
ClausesSection <Clauses> Eof
|
||||
|
||||
Idents := Identifier <Idents>
|
||||
:= ε
|
||||
|
||||
Clauses := Newline <Clauses'>
|
||||
:= ε
|
||||
|
||||
Clauses' := <Clause> <Clauses>
|
||||
:= ε
|
||||
|
||||
Clause := <Term> <Clause'>
|
||||
|
||||
Clause' := <Clause>
|
||||
:= ε
|
||||
|
||||
Term := Negate <Term>
|
||||
:= Identifier <Func?>
|
||||
|
||||
Func? := OpenP <Term> <CSTerms> CloseP
|
||||
:= ε
|
||||
|
||||
CSTerms := Comma <Term> <CSTerms>
|
||||
:= ε
|
||||
"""
|
||||
|
||||
class Variable(IntEnum):
|
||||
Start = auto()
|
||||
Idents = auto()
|
||||
Clauses = auto()
|
||||
Clauses_ = auto()
|
||||
Clause = auto()
|
||||
Clause_ = auto()
|
||||
Term = auto()
|
||||
Func = auto()
|
||||
CSTerms = auto()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'<{self._name_}>'
|
||||
|
||||
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
|
||||
(Variable.Start,
|
||||
[ Tok.PredicateSection, Variable.Idents, Tok.Newline
|
||||
, Tok.VariablesSection, Variable.Idents, Tok.Newline
|
||||
, Tok.ConstantsSection, Variable.Idents, Tok.Newline
|
||||
, Tok.FunctionsSection, Variable.Idents, Tok.Newline
|
||||
, Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
|
||||
|
||||
(Variable.Idents,
|
||||
[ Tok.Identifier, Variable.Idents ]),
|
||||
(Variable.Idents,
|
||||
[ ]),
|
||||
|
||||
(Variable.Clauses,
|
||||
[ Tok.Newline, Variable.Clauses_ ]),
|
||||
(Variable.Clauses,
|
||||
[ ]),
|
||||
|
||||
(Variable.Clauses_,
|
||||
[ Variable.Clause, Variable.Clauses ]),
|
||||
(Variable.Clauses_,
|
||||
[ ]),
|
||||
|
||||
(Variable.Clause,
|
||||
[ Variable.Term, Variable.Clause_ ]),
|
||||
|
||||
(Variable.Clause_,
|
||||
[ Variable.Clause ]),
|
||||
(Variable.Clause_,
|
||||
[ ]),
|
||||
|
||||
(Variable.Term,
|
||||
[ Tok.Negate, Variable.Term ]),
|
||||
(Variable.Term,
|
||||
[ Tok.Identifier, Variable.Func ]),
|
||||
|
||||
(Variable.Func,
|
||||
[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
|
||||
(Variable.Func,
|
||||
[ ]),
|
||||
|
||||
(Variable.CSTerms,
|
||||
[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
|
||||
(Variable.CSTerms,
|
||||
[ ]),
|
||||
]
|
||||
|
||||
|
||||
# ### FIRST Table ###
|
||||
#
|
||||
# Start : PredicateSection
|
||||
# Idents : Identifier, ε
|
||||
# Clauses : Newline, ε
|
||||
# Clauses' : Negate, Identifier, ε
|
||||
# Clause : Negate, Identifier
|
||||
# Clause' : Negate, Identifier, ε
|
||||
# Term : Negate, Identifier
|
||||
# Func? : OpenP
|
||||
# CSTerms : Comma, ε
|
||||
#
|
||||
#
|
||||
#
|
||||
# ### FOLLOW Table ###
|
||||
#
|
||||
# Idents : Newline
|
||||
# Clauses : Eof
|
||||
# Clauses' : Eof
|
||||
# Clause : Newline, Eof
|
||||
# Clause' : Newline, Eof
|
||||
# Term : Negate, Identifier, Newline, Eof, Comma
|
||||
# Func? : Negate, Identifier, Newline, Eof, Comma
|
||||
# CSTerms : CloseP
|
||||
#
|
||||
#
|
||||
#
|
||||
# ### PREDICT Table ###
|
||||
#
|
||||
# Idents : Identifier
|
||||
# : Newline
|
||||
# Clauses : Newline
|
||||
# : Eof
|
||||
# Clauses' : Negate, Identifier
|
||||
# : Eof
|
||||
# Clause : Newline, Eof
|
||||
# Clause' : Newline, Eof
|
||||
# Term : Negate, Identifier, Newline, Eof, Comma
|
||||
# Func? : Negate, Identifier, Newline, Eof, Comma
|
||||
# CSTerms : CloseP
|
Loading…
Reference in a new issue