Move grammar and lex table to their own file
This commit is contained in:
parent
00043d27dd
commit
f813d91736
|
@ -330,8 +330,7 @@ def print_oracle_table_enum(
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import doctest
|
import doctest
|
||||||
from lex import Tok
|
from grammar import GRAMMAR, Tok, Variable
|
||||||
from parse import GRAMMAR, Variable
|
|
||||||
failure_count, test_count = doctest.testmod()
|
failure_count, test_count = doctest.testmod()
|
||||||
if failure_count:
|
if failure_count:
|
||||||
print('\n\nRefusing to build oracle table due to test failures')
|
print('\n\nRefusing to build oracle table due to test failures')
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
cat << EOF > oracle_table.py
|
cat << EOF > oracle_table.py
|
||||||
from lex import Tok
|
from grammar import Tok, Variable
|
||||||
from parse import Variable
|
|
||||||
|
|
||||||
oracle_table = (
|
oracle_table = (
|
||||||
EOF
|
EOF
|
||||||
|
|
139
grammar.py
Normal file
139
grammar.py
Normal file
|
@ -0,0 +1,139 @@
|
||||||
|
from enum import auto, IntEnum
|
||||||
|
from re import compile, Pattern
|
||||||
|
|
||||||
|
from typing import Collection, Mapping, Sequence, Tuple
|
||||||
|
|
||||||
|
class Tok(IntEnum):
|
||||||
|
"""
|
||||||
|
All possible tokens used in the grammar
|
||||||
|
"""
|
||||||
|
Newline = auto()
|
||||||
|
Whitespace = auto()
|
||||||
|
PredicateSection = auto()
|
||||||
|
VariablesSection = auto()
|
||||||
|
ConstantsSection = auto()
|
||||||
|
FunctionsSection = auto()
|
||||||
|
ClausesSection = auto()
|
||||||
|
Negate = auto()
|
||||||
|
OpenP = auto()
|
||||||
|
CloseP = auto()
|
||||||
|
Comma = auto()
|
||||||
|
Identifier = auto()
|
||||||
|
Eof = auto()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self._name_
|
||||||
|
|
||||||
|
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
||||||
|
(compile(r"\n"), Tok.Newline),
|
||||||
|
(compile(r"[ \t]+"), Tok.Whitespace),
|
||||||
|
(compile("Predicates:"), Tok.PredicateSection),
|
||||||
|
(compile("Variables:"), Tok.VariablesSection),
|
||||||
|
(compile("Constants:"), Tok.ConstantsSection),
|
||||||
|
(compile("Functions:"), Tok.FunctionsSection),
|
||||||
|
(compile("Clauses:"), Tok.ClausesSection),
|
||||||
|
(compile("!"), Tok.Negate),
|
||||||
|
(compile(r"\("), Tok.OpenP),
|
||||||
|
(compile(r"\)"), Tok.CloseP),
|
||||||
|
(compile(","), Tok.Comma),
|
||||||
|
(compile(r"\w+"), Tok.Identifier),
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
A mapping of regexs to the tokens the identify
|
||||||
|
|
||||||
|
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
||||||
|
on the list also matches. All unicode strings should be matched by at least one token.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Variable(IntEnum):
|
||||||
|
Start = auto()
|
||||||
|
Idents = auto()
|
||||||
|
Clauses = auto()
|
||||||
|
Clauses_ = auto()
|
||||||
|
Clause = auto()
|
||||||
|
Clause_ = auto()
|
||||||
|
Term = auto()
|
||||||
|
Func = auto()
|
||||||
|
CSTerms = auto()
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f'<{self._name_}>'
|
||||||
|
|
||||||
|
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
|
||||||
|
(Variable.Start,
|
||||||
|
[ Tok.PredicateSection, Variable.Idents, Tok.Newline
|
||||||
|
, Tok.VariablesSection, Variable.Idents, Tok.Newline
|
||||||
|
, Tok.ConstantsSection, Variable.Idents, Tok.Newline
|
||||||
|
, Tok.FunctionsSection, Variable.Idents, Tok.Newline
|
||||||
|
, Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
|
||||||
|
|
||||||
|
(Variable.Idents,
|
||||||
|
[ Tok.Identifier, Variable.Idents ]),
|
||||||
|
(Variable.Idents,
|
||||||
|
[ ]),
|
||||||
|
|
||||||
|
(Variable.Clauses,
|
||||||
|
[ Tok.Newline, Variable.Clauses_ ]),
|
||||||
|
(Variable.Clauses,
|
||||||
|
[ ]),
|
||||||
|
|
||||||
|
(Variable.Clauses_,
|
||||||
|
[ Variable.Clause, Variable.Clauses ]),
|
||||||
|
(Variable.Clauses_,
|
||||||
|
[ ]),
|
||||||
|
|
||||||
|
(Variable.Clause,
|
||||||
|
[ Variable.Term, Variable.Clause_ ]),
|
||||||
|
|
||||||
|
(Variable.Clause_,
|
||||||
|
[ Variable.Clause ]),
|
||||||
|
(Variable.Clause_,
|
||||||
|
[ ]),
|
||||||
|
|
||||||
|
(Variable.Term,
|
||||||
|
[ Tok.Negate, Variable.Term ]),
|
||||||
|
(Variable.Term,
|
||||||
|
[ Tok.Identifier, Variable.Func ]),
|
||||||
|
|
||||||
|
(Variable.Func,
|
||||||
|
[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
|
||||||
|
(Variable.Func,
|
||||||
|
[ ]),
|
||||||
|
|
||||||
|
(Variable.CSTerms,
|
||||||
|
[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
|
||||||
|
(Variable.CSTerms,
|
||||||
|
[ ]),
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
Implements the following grammar:
|
||||||
|
|
||||||
|
Start := PredicateSection <Idents> Newline
|
||||||
|
VariablesSection <Idents> Newline
|
||||||
|
ConstantsSection <Idents> Newline
|
||||||
|
FunctionsSection <Idents> Newline
|
||||||
|
ClausesSection <Clauses> Eof
|
||||||
|
|
||||||
|
Idents := Identifier <Idents>
|
||||||
|
:= ε
|
||||||
|
|
||||||
|
Clauses := Newline <Clauses'>
|
||||||
|
:= ε
|
||||||
|
|
||||||
|
Clauses' := <Clause> <Clauses>
|
||||||
|
:= ε
|
||||||
|
|
||||||
|
Clause := <Term> <Clause'>
|
||||||
|
|
||||||
|
Clause' := <Clause>
|
||||||
|
:= ε
|
||||||
|
|
||||||
|
Term := Negate <Term>
|
||||||
|
:= Identifier <Func?>
|
||||||
|
|
||||||
|
Func? := OpenP <Term> <CSTerms> CloseP
|
||||||
|
:= ε
|
||||||
|
|
||||||
|
CSTerms := Comma <Term> <CSTerms>
|
||||||
|
:= ε
|
||||||
|
"""
|
46
lex.py
46
lex.py
|
@ -3,52 +3,10 @@ from emis_funky_funktions import *
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from enum import auto, IntEnum
|
from enum import auto, IntEnum
|
||||||
from operator import is_not
|
from operator import is_not
|
||||||
from re import compile, Pattern
|
from re import Pattern
|
||||||
|
|
||||||
from typing import Collection, Tuple, List, NewType
|
from typing import Collection, Tuple, List, NewType
|
||||||
|
|
||||||
class Tok(IntEnum):
|
|
||||||
"""
|
|
||||||
All possible tokens used in the grammar
|
|
||||||
"""
|
|
||||||
Newline = auto()
|
|
||||||
Whitespace = auto()
|
|
||||||
PredicateSection = auto()
|
|
||||||
VariablesSection = auto()
|
|
||||||
ConstantsSection = auto()
|
|
||||||
FunctionsSection = auto()
|
|
||||||
ClausesSection = auto()
|
|
||||||
Negate = auto()
|
|
||||||
OpenP = auto()
|
|
||||||
CloseP = auto()
|
|
||||||
Comma = auto()
|
|
||||||
Identifier = auto()
|
|
||||||
Eof = auto()
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return self._name_
|
|
||||||
|
|
||||||
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
|
||||||
(compile(r"\n"), Tok.Newline),
|
|
||||||
(compile(r"[ \t]+"), Tok.Whitespace),
|
|
||||||
(compile("Predicates:"), Tok.PredicateSection),
|
|
||||||
(compile("Variables:"), Tok.VariablesSection),
|
|
||||||
(compile("Constants:"), Tok.ConstantsSection),
|
|
||||||
(compile("Functions:"), Tok.FunctionsSection),
|
|
||||||
(compile("Clauses:"), Tok.ClausesSection),
|
|
||||||
(compile("!"), Tok.Negate),
|
|
||||||
(compile(r"\("), Tok.OpenP),
|
|
||||||
(compile(r"\)"), Tok.CloseP),
|
|
||||||
(compile(","), Tok.Comma),
|
|
||||||
(compile(r"\w+"), Tok.Identifier),
|
|
||||||
]
|
|
||||||
"""
|
|
||||||
A mapping of regexs to the tokens the identify
|
|
||||||
|
|
||||||
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
|
||||||
on the list also matches. All unicode strings should be matched by at least one token.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def try_lex1(regex: Pattern[str], tok: A, input: str) -> Option[Tuple[Tuple[A, str], str]]:
|
def try_lex1(regex: Pattern[str], tok: A, input: str) -> Option[Tuple[Tuple[A, str], str]]:
|
||||||
"""
|
"""
|
||||||
Attempt to recognize a single token against a full input string
|
Attempt to recognize a single token against a full input string
|
||||||
|
@ -112,4 +70,6 @@ def tokenize(lex_table: Collection[Tuple[Pattern[str], A]], drop_tokens: Collect
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# print(tokenize(open('sample.cnf').read()))
|
# print(tokenize(open('sample.cnf').read()))
|
||||||
import doctest
|
import doctest
|
||||||
|
from re import compile
|
||||||
|
from grammar import Tok, LEX_TABLE
|
||||||
doctest.testmod()
|
doctest.testmod()
|
144
parse.py
144
parse.py
|
@ -1,144 +0,0 @@
|
||||||
from emis_funky_funktions import *
|
|
||||||
|
|
||||||
from enum import auto, IntEnum
|
|
||||||
from functools import cache, reduce
|
|
||||||
from operator import getitem
|
|
||||||
from typing import Any, cast, Collection, Mapping, Sequence, Set, Tuple, TypeGuard
|
|
||||||
|
|
||||||
from lex import Tok
|
|
||||||
|
|
||||||
"""
|
|
||||||
Implements a parser for the following grammar:
|
|
||||||
|
|
||||||
Start := PredicateSection <Idents> Newline
|
|
||||||
VariablesSection <Idents> Newline
|
|
||||||
ConstantsSection <Idents> Newline
|
|
||||||
FunctionsSection <Idents> Newline
|
|
||||||
ClausesSection <Clauses> Eof
|
|
||||||
|
|
||||||
Idents := Identifier <Idents>
|
|
||||||
:= ε
|
|
||||||
|
|
||||||
Clauses := Newline <Clauses'>
|
|
||||||
:= ε
|
|
||||||
|
|
||||||
Clauses' := <Clause> <Clauses>
|
|
||||||
:= ε
|
|
||||||
|
|
||||||
Clause := <Term> <Clause'>
|
|
||||||
|
|
||||||
Clause' := <Clause>
|
|
||||||
:= ε
|
|
||||||
|
|
||||||
Term := Negate <Term>
|
|
||||||
:= Identifier <Func?>
|
|
||||||
|
|
||||||
Func? := OpenP <Term> <CSTerms> CloseP
|
|
||||||
:= ε
|
|
||||||
|
|
||||||
CSTerms := Comma <Term> <CSTerms>
|
|
||||||
:= ε
|
|
||||||
"""
|
|
||||||
|
|
||||||
class Variable(IntEnum):
|
|
||||||
Start = auto()
|
|
||||||
Idents = auto()
|
|
||||||
Clauses = auto()
|
|
||||||
Clauses_ = auto()
|
|
||||||
Clause = auto()
|
|
||||||
Clause_ = auto()
|
|
||||||
Term = auto()
|
|
||||||
Func = auto()
|
|
||||||
CSTerms = auto()
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f'<{self._name_}>'
|
|
||||||
|
|
||||||
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok]]] = [
|
|
||||||
(Variable.Start,
|
|
||||||
[ Tok.PredicateSection, Variable.Idents, Tok.Newline
|
|
||||||
, Tok.VariablesSection, Variable.Idents, Tok.Newline
|
|
||||||
, Tok.ConstantsSection, Variable.Idents, Tok.Newline
|
|
||||||
, Tok.FunctionsSection, Variable.Idents, Tok.Newline
|
|
||||||
, Tok.ClausesSection, Variable.Clauses, Tok.Eof ] ),
|
|
||||||
|
|
||||||
(Variable.Idents,
|
|
||||||
[ Tok.Identifier, Variable.Idents ]),
|
|
||||||
(Variable.Idents,
|
|
||||||
[ ]),
|
|
||||||
|
|
||||||
(Variable.Clauses,
|
|
||||||
[ Tok.Newline, Variable.Clauses_ ]),
|
|
||||||
(Variable.Clauses,
|
|
||||||
[ ]),
|
|
||||||
|
|
||||||
(Variable.Clauses_,
|
|
||||||
[ Variable.Clause, Variable.Clauses ]),
|
|
||||||
(Variable.Clauses_,
|
|
||||||
[ ]),
|
|
||||||
|
|
||||||
(Variable.Clause,
|
|
||||||
[ Variable.Term, Variable.Clause_ ]),
|
|
||||||
|
|
||||||
(Variable.Clause_,
|
|
||||||
[ Variable.Clause ]),
|
|
||||||
(Variable.Clause_,
|
|
||||||
[ ]),
|
|
||||||
|
|
||||||
(Variable.Term,
|
|
||||||
[ Tok.Negate, Variable.Term ]),
|
|
||||||
(Variable.Term,
|
|
||||||
[ Tok.Identifier, Variable.Func ]),
|
|
||||||
|
|
||||||
(Variable.Func,
|
|
||||||
[ Tok.OpenP, Variable.CSTerms, Tok.CloseP ]),
|
|
||||||
(Variable.Func,
|
|
||||||
[ ]),
|
|
||||||
|
|
||||||
(Variable.CSTerms,
|
|
||||||
[ Tok.Comma, Variable.Term, Variable.CSTerms ]),
|
|
||||||
(Variable.CSTerms,
|
|
||||||
[ ]),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
# ### FIRST Table ###
|
|
||||||
#
|
|
||||||
# Start : PredicateSection
|
|
||||||
# Idents : Identifier, ε
|
|
||||||
# Clauses : Newline, ε
|
|
||||||
# Clauses' : Negate, Identifier, ε
|
|
||||||
# Clause : Negate, Identifier
|
|
||||||
# Clause' : Negate, Identifier, ε
|
|
||||||
# Term : Negate, Identifier
|
|
||||||
# Func? : OpenP
|
|
||||||
# CSTerms : Comma, ε
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# ### FOLLOW Table ###
|
|
||||||
#
|
|
||||||
# Idents : Newline
|
|
||||||
# Clauses : Eof
|
|
||||||
# Clauses' : Eof
|
|
||||||
# Clause : Newline, Eof
|
|
||||||
# Clause' : Newline, Eof
|
|
||||||
# Term : Negate, Identifier, Newline, Eof, Comma
|
|
||||||
# Func? : Negate, Identifier, Newline, Eof, Comma
|
|
||||||
# CSTerms : CloseP
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# ### PREDICT Table ###
|
|
||||||
#
|
|
||||||
# Idents : Identifier
|
|
||||||
# : Newline
|
|
||||||
# Clauses : Newline
|
|
||||||
# : Eof
|
|
||||||
# Clauses' : Negate, Identifier
|
|
||||||
# : Eof
|
|
||||||
# Clause : Newline, Eof
|
|
||||||
# Clause' : Newline, Eof
|
|
||||||
# Term : Negate, Identifier, Newline, Eof, Comma
|
|
||||||
# Func? : Negate, Identifier, Newline, Eof, Comma
|
|
||||||
# CSTerms : CloseP
|
|
Loading…
Reference in a new issue