Start making a grammar then realize there's a builtin json parser
This commit is contained in:
parent
bc2a2e2a8c
commit
206c0a0ec0
203
build_oracle.py
203
build_oracle.py
|
@ -1,203 +0,0 @@
|
|||
"""
|
||||
Tools for building an oracle table
|
||||
|
||||
See `grammar` and `build_oracle.sh` for scripts which actually produce python code. This
|
||||
module only produces an oracle table in python, without outputting it.
|
||||
|
||||
NOTE! Doctests in this module use `GRAMMAR` from `grammar.py` and `EGRAMMAR` as a version
|
||||
of that grammar with the actions erased, with `_erase_actions()`.
|
||||
"""
|
||||
from emis_funky_funktions import *
|
||||
|
||||
from enum import auto, Enum, IntEnum
|
||||
from functools import cache, reduce
|
||||
from operator import getitem
|
||||
from typing import Any, cast, Collection, Mapping, Sequence, Set, Tuple, TypeGuard, TypeVar
|
||||
|
||||
def _erase_actions_h(
|
||||
handle: Sequence[A | B | C],
|
||||
is_not_c: Callable[[A | B | C], TypeGuard[A | B]]
|
||||
) -> Sequence[A | B]:
|
||||
"""
|
||||
Produce an identical handle, but with all the actions removed
|
||||
"""
|
||||
return [i for i in handle if is_not_c(i)]
|
||||
|
||||
def _erase_actions(
|
||||
grammar: Sequence[Tuple[A, Sequence[A | B | C]]],
|
||||
is_not_c: Callable[[A | B | C], TypeGuard[A | B]]
|
||||
) -> Sequence[Tuple[A, Sequence[A | B]]]:
|
||||
"""
|
||||
Produce an identical grammar, but with all the actions removed
|
||||
"""
|
||||
return [
|
||||
(var, _erase_actions_h(handle, is_not_c))
|
||||
for (var, handle) in grammar
|
||||
]
|
||||
|
||||
def _first(
|
||||
is_term: Callable[[A | B], TypeGuard[B]],
|
||||
grammar: Sequence[Tuple[A, Sequence[A | B]]],
|
||||
sequence: Sequence[A | B]
|
||||
) -> Tuple[Collection[B], bool]:
|
||||
"""
|
||||
Computes all of the possible starting terminals for a handle in a given grammar
|
||||
|
||||
Due to pathetic python weaknesses, the first argument you must provide is a type guard
|
||||
to determine whether a certain thing is a terminal as opposed to a variable.
|
||||
|
||||
Then, pass in the grammar and the sequence of terminals and variables in question.
|
||||
|
||||
The output contains two values. The first is a set of possible terminals, and the
|
||||
second is a boolean indicating whether this term can derive epsilon.
|
||||
|
||||
>>> _first(flip(cur2(isinstance))(Tok), EGRAMMAR, [Variable.Clause])
|
||||
({Negate, Identifier}, False)
|
||||
|
||||
>>> _first(flip(cur2(isinstance))(Tok), EGRAMMAR, [Variable.CSTerms])
|
||||
({Comma}, True)
|
||||
|
||||
>>> _first(flip(cur2(isinstance))(Tok), EGRAMMAR, [Variable.CSTerms, Tok.CloseP])
|
||||
({CloseP, Comma}, False)
|
||||
"""
|
||||
def inner(vs: Sequence[A | B]) -> Tuple[Set[B], bool]:
|
||||
match vs:
|
||||
case []:
|
||||
return (set(), True)
|
||||
case [v, *rest] if is_term(v):
|
||||
return ({v}, False)
|
||||
case [v, *rest]:
|
||||
this_variable_first, derives_epsilon = reduce(
|
||||
lambda acc, result: (acc[0] | result[0], acc[1] or result[1]),
|
||||
[
|
||||
inner(handle)
|
||||
for (other_variable, handle) in grammar
|
||||
if other_variable == v
|
||||
]
|
||||
)
|
||||
if derives_epsilon:
|
||||
rest_first, rest_derives_epsilon = inner(rest)
|
||||
return (rest_first | this_variable_first, rest_derives_epsilon)
|
||||
else:
|
||||
return (this_variable_first, False)
|
||||
raise Exception("UNREACHABLE")
|
||||
return inner(sequence)
|
||||
|
||||
def _follow(
|
||||
is_term: Callable[[A | B], TypeGuard[B]],
|
||||
grammar: Sequence[Tuple[A, Sequence[A | B]]],
|
||||
) -> Mapping[A, Collection[B]]:
|
||||
"""
|
||||
Produce a table indicating exactly which terminals can follow each variable
|
||||
|
||||
>>> _follow(flip(cur2(isinstance))(Tok), EGRAMMAR) #doctest: +NORMALIZE_WHITESPACE
|
||||
{<Start>: set(),
|
||||
<Idents>: {Newline},
|
||||
<Clauses>: {Eof},
|
||||
<Clauses_>: {Eof},
|
||||
<Clause>: {Newline, Eof},
|
||||
<Clause_>: {Newline, Eof},
|
||||
<Term>: {Newline, Negate, CloseP, Comma, Identifier, Eof},
|
||||
<Func>: {Newline, Negate, CloseP, Comma, Identifier, Eof},
|
||||
<CSTerms>: {CloseP}}
|
||||
"""
|
||||
follow_table: Mapping[A, Set[B]] = {
|
||||
variable: set()
|
||||
for (variable, _) in grammar
|
||||
}
|
||||
def following_tokens(handle: Sequence[A | B], follows_handle: Set[B]) -> Set[B]:
|
||||
handle_first, handle_derives_epsilon = _first(is_term, grammar, handle)
|
||||
return set(handle_first) | (follows_handle if handle_derives_epsilon else set())
|
||||
|
||||
def inner(prev_table: Mapping[A, Set[B]]) -> Mapping[A, Set[B]]:
|
||||
new_table = reduce(
|
||||
lambda acc, entry: acc | {entry[0]: acc[entry[0]] | entry[1]},
|
||||
[
|
||||
(
|
||||
cast(A, handle[i]),
|
||||
following_tokens(handle[i+1:], prev_table[variable])
|
||||
)
|
||||
for (variable, handle) in grammar
|
||||
for i in range(len(handle))
|
||||
if not is_term(handle[i])
|
||||
],
|
||||
prev_table
|
||||
)
|
||||
if new_table == prev_table:
|
||||
return new_table
|
||||
else:
|
||||
return inner(new_table)
|
||||
return inner(follow_table)
|
||||
|
||||
def _predict(
|
||||
is_term: Callable[[A | B], TypeGuard[B]],
|
||||
grammar: Sequence[Tuple[A, Sequence[A | B]]],
|
||||
follow: Mapping[A, Collection[B]],
|
||||
lhs: A,
|
||||
rhs: Sequence[A | B]
|
||||
) -> Collection[B]:
|
||||
"""
|
||||
Given a production, identify the terminals which this production would be valid under
|
||||
|
||||
>>> is_tok = flip(cur2(isinstance))(Tok)
|
||||
>>> follow = _follow(is_tok, EGRAMMAR)
|
||||
>>> _predict(is_tok, EGRAMMAR, follow, Variable.Clause, [Variable.Term, Variable.Clause_])
|
||||
{Negate, Identifier}
|
||||
"""
|
||||
first_rhs, epsilon_rhs = _first(is_term, grammar, rhs)
|
||||
if epsilon_rhs:
|
||||
return set(follow[lhs]) | set(first_rhs)
|
||||
else:
|
||||
return first_rhs
|
||||
|
||||
def oracle_table(
|
||||
is_term: Callable[[A | B], TypeGuard[B]],
|
||||
is_var: Callable[[A | B], TypeGuard[A]],
|
||||
grammar: Sequence[Tuple[A, Sequence[A | B]]],
|
||||
) -> Mapping[A, Mapping[B, Collection[Sequence[A | B]]]]:
|
||||
"""
|
||||
A variant of `_oracle` that generates a table immediately rather than lazily
|
||||
|
||||
No significant performance benefit
|
||||
|
||||
>>> is_tok = p_instance(Tok)
|
||||
>>> is_var = p_instance(Variable)
|
||||
>>> my_oracle_table = oracle_table(is_tok, is_var, EGRAMMAR)
|
||||
|
||||
One valid expansion:
|
||||
>>> my_oracle_table[Variable.Clauses_][Tok.Negate]
|
||||
[[<Clause>, <Clauses>]]
|
||||
|
||||
One valid expansion, but it expands to epsilon:
|
||||
>>> my_oracle_table[Variable.Clauses_][Tok.Eof]
|
||||
[[]]
|
||||
|
||||
Zero valid expansions:
|
||||
>>> my_oracle_table[Variable.Term][Tok.Newline]
|
||||
[]
|
||||
"""
|
||||
all_variables = { lhs for (lhs, rhs) in grammar }
|
||||
all_terminals = { symbol for (lhs, rhs) in grammar for symbol in rhs if is_term(symbol) }
|
||||
|
||||
is_not_c: Callable[[A | B | C], TypeGuard[A | B]] = lambda x: is_term(x) or is_var(x) #type:ignore
|
||||
e_grammar: Sequence[Tuple[A, Sequence[A | B]]] = _erase_actions(grammar, is_not_c) #type:ignore
|
||||
follow = _follow(is_term, e_grammar)
|
||||
|
||||
return {
|
||||
v: {
|
||||
t: [
|
||||
handle
|
||||
for (lhs, handle) in grammar
|
||||
if lhs == v
|
||||
and t in _predict(is_term, e_grammar, follow, lhs, _erase_actions_h(handle, is_not_c)) #type:ignore
|
||||
]
|
||||
for t in all_terminals
|
||||
}
|
||||
for v in all_variables
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
from grammar import GRAMMAR, Tok, Variable
|
||||
EGRAMMAR = _erase_actions(GRAMMAR, lambda x: not hasattr(x, '__call__')) #type: ignore
|
||||
doctest.testmod()
|
323
grammar.py
323
grammar.py
|
@ -1,300 +1,39 @@
|
|||
"""
|
||||
A grammar for parsing CNF files
|
||||
|
||||
If this module is run directly in python, it will spit out valid python which produces an
|
||||
oracle table for the grammar it defines. It's recommended that this be done using
|
||||
`build_oracle.sh` instead, however, which will build a whole python module containing the
|
||||
oracle table, complete with imports.
|
||||
"""
|
||||
from emis_funky_funktions import *
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import auto, IntEnum
|
||||
from typing import Collection, Tuple
|
||||
from re import compile, Pattern
|
||||
|
||||
from build_oracle import oracle_table
|
||||
from ir import IRNeg, IRProp, IRTerm, IRVar, KnowledgeBase
|
||||
from lex import Lexeme, tokenize
|
||||
from parse import Action, parser
|
||||
from tokens import *
|
||||
class Tok(IntEnum):
|
||||
"""
|
||||
All possible tokens used in the grammar
|
||||
"""
|
||||
Whitespace = auto()
|
||||
OpenCurly = auto()
|
||||
CloseCurly = auto()
|
||||
OpenSquare = auto()
|
||||
CloseSquare = auto()
|
||||
Comma = auto()
|
||||
Colon = auto()
|
||||
String = auto()
|
||||
Number = auto()
|
||||
Eof = auto()
|
||||
|
||||
from typing import Any, Callable, cast, Collection, Mapping, Sequence, Tuple, TypeAlias
|
||||
def __repr__(self):
|
||||
return self._name_
|
||||
|
||||
|
||||
class Variable(IntEnum):
|
||||
Start = auto()
|
||||
Idents = auto()
|
||||
Clauses = auto()
|
||||
Clauses_ = auto()
|
||||
Clause = auto()
|
||||
Clause_ = auto()
|
||||
Term = auto()
|
||||
Func = auto()
|
||||
CSTerms = auto()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'<{self._name_}>'
|
||||
|
||||
ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
|
||||
|
||||
class IdentKind(IntEnum):
|
||||
Function = auto()
|
||||
Constant = auto()
|
||||
Variable = auto()
|
||||
Predicate = auto()
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CallingNonFunc:
|
||||
term: Lexeme[Tok]
|
||||
obj_type: IdentKind
|
||||
def __str__(self):
|
||||
return f'Semantic error: Attempted to call {repr(self.term.matched_string)} (a {self.obj_type.name.lower()}) with arguments on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MissingArguments:
|
||||
term: Lexeme[Tok]
|
||||
def __str__(self):
|
||||
return f'Semantic error: The function {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end} is missing arguments!'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UnidentifiedVariable:
|
||||
term: Lexeme[Tok]
|
||||
def __str__(self):
|
||||
return f'Semantic error: Unidentified identifier {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PropUsedInObjectContext:
|
||||
term: Lexeme[Tok]
|
||||
def __str__(self):
|
||||
return f'Semantic error: The proposition {repr(self.term.matched_string)} was used in a context where an object was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ObjectUsedInPropContext:
|
||||
term: Lexeme[Tok]
|
||||
obj_type: IdentKind
|
||||
def __str__(self):
|
||||
return f'Semantic error: The {self.obj_type.name.lower()} {repr(self.term.matched_string)} was used in a context where a proposition was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NegationOfObject:
|
||||
line: int
|
||||
col: int
|
||||
def __str__(self):
|
||||
return f'Semantic error: Attempted to use negation in a context where working on objects on line {self.line}:{self.col}'
|
||||
|
||||
GenIrError: TypeAlias = CallingNonFunc | MissingArguments | UnidentifiedVariable | PropUsedInObjectContext | ObjectUsedInPropContext | NegationOfObject
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IdentBindings:
|
||||
predicate_idents: Sequence[str]
|
||||
variable_idents: Sequence[str]
|
||||
const_idents: Sequence[str]
|
||||
func_idents: Sequence[str]
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ASTNegated:
|
||||
neg_lexeme: Lexeme[Tok]
|
||||
term: ASTTerm
|
||||
|
||||
def make_ir(self, idents: IdentBindings, is_prop: bool) -> 'Result[IRTerm, GenIrError]':
|
||||
if is_prop:
|
||||
return map_res(IRNeg, self.term.make_ir(idents, True))
|
||||
else:
|
||||
return Err(NegationOfObject(self.neg_lexeme.line, self.neg_lexeme.col_start))
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ASTProp:
|
||||
ident: Lexeme[Tok]
|
||||
arguments: Sequence[ASTTerm]
|
||||
|
||||
def make_ir(self, idents: IdentBindings, is_pred: bool) -> 'Result[IRTerm, GenIrError]':
|
||||
bound_type = (
|
||||
IdentKind.Predicate
|
||||
if self.ident.matched_string in idents.predicate_idents else
|
||||
IdentKind.Variable
|
||||
if self.ident.matched_string in idents.variable_idents else
|
||||
IdentKind.Constant
|
||||
if self.ident.matched_string in idents.const_idents else
|
||||
IdentKind.Function
|
||||
if self.ident.matched_string in idents.func_idents else
|
||||
None
|
||||
)
|
||||
if bound_type is None:
|
||||
return Err(UnidentifiedVariable(self.ident))
|
||||
|
||||
if is_pred:
|
||||
if bound_type != IdentKind.Predicate:
|
||||
return Err(ObjectUsedInPropContext(self.ident, bound_type))
|
||||
else:
|
||||
if bound_type == IdentKind.Function:
|
||||
if not len(self.arguments):
|
||||
return Err(MissingArguments(self.ident))
|
||||
elif bound_type == IdentKind.Predicate:
|
||||
return Err(PropUsedInObjectContext(self.ident))
|
||||
else:
|
||||
if len(self.arguments):
|
||||
return Err(CallingNonFunc(self.ident, bound_type))
|
||||
|
||||
if bound_type == IdentKind.Variable:
|
||||
return Ok(IRVar(self.ident.matched_string))
|
||||
else:
|
||||
return (sequence([t.make_ir(idents, False) for t in self.arguments])
|
||||
<= cast(Callable[[Iterable[IRTerm]], Tuple[IRTerm, ...]], tuple))\
|
||||
<= p(IRProp, self.ident.matched_string)
|
||||
|
||||
@cur2
|
||||
def make_ir(
|
||||
idents: IdentBindings,
|
||||
clauses: Sequence[Sequence[ASTTerm]],
|
||||
) -> Result[KnowledgeBase, GenIrError]:
|
||||
return map_res(
|
||||
lambda kb_: FSet(FSet(clause) for clause in kb_),
|
||||
sequence([sequence([term.make_ir(idents, True) for term in clause]) for clause in clauses])
|
||||
)
|
||||
|
||||
def cons(stack: Sequence[Any]) -> Sequence[Any]:
|
||||
match stack:
|
||||
case [rest, head, *popped_stack]:
|
||||
return ((head, *rest), *popped_stack)
|
||||
case bad_stack:
|
||||
raise Exception("Unexpected stack state!", bad_stack)
|
||||
|
||||
nil: Sequence[Any] = tuple()
|
||||
@cur2
|
||||
def introduce(
|
||||
cons: Any,
|
||||
stack: Sequence[Any]
|
||||
) -> Sequence[Any]:
|
||||
return (cons, *stack)
|
||||
|
||||
def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
|
||||
match stack:
|
||||
case [arg, func, *popped_stack] if hasattr(func, '__call__'):
|
||||
return (func(arg), *popped_stack)
|
||||
raise Exception("Unexpected stack state!", stack)
|
||||
@cur2
|
||||
def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
|
||||
match stack:
|
||||
case [arg, *popped_stack]:
|
||||
return (func(arg), *popped_stack)
|
||||
case bad_stack:
|
||||
raise Exception("Unexpected stack state!", bad_stack)
|
||||
|
||||
def drop(stack: Sequence[Any]) -> Sequence[Any]:
|
||||
return stack[1:]
|
||||
|
||||
GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
|
||||
(Variable.Start,
|
||||
[ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,IdentBindings)), Tok.Newline, drop
|
||||
, Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
||||
, Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
|
||||
, Tok.FunctionsSection, drop, Variable.Idents, f_apply, call_func(make_ir), Tok.Newline, drop
|
||||
, Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
|
||||
|
||||
(Variable.Idents,
|
||||
[ Tok.Identifier, call_func(lambda i: i.matched_string), Variable.Idents, cons ]),
|
||||
(Variable.Idents,
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.Clauses,
|
||||
[ Tok.Newline, drop, Variable.Clauses_ ]),
|
||||
(Variable.Clauses,
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.Clauses_,
|
||||
[ Variable.Clause, Variable.Clauses, cons ]),
|
||||
(Variable.Clauses_,
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.Clause,
|
||||
[ Variable.Term, Variable.Clause_, cons ]),
|
||||
|
||||
(Variable.Clause_,
|
||||
[ Variable.Clause ]),
|
||||
(Variable.Clause_,
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.Term,
|
||||
[ Tok.Negate, call_func(cur2(ASTNegated)), Variable.Term, f_apply ]),
|
||||
(Variable.Term,
|
||||
[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
|
||||
|
||||
(Variable.Func,
|
||||
[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
|
||||
(Variable.Func,
|
||||
[ introduce(nil) ]),
|
||||
|
||||
(Variable.CSTerms,
|
||||
[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
|
||||
(Variable.CSTerms,
|
||||
[ introduce(nil) ]),
|
||||
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
||||
(compile(r"[\s\n]+"), Tok.Whitespace),
|
||||
(compile(r"{"), Tok.OpenCurly),
|
||||
(compile(r"}"), Tok.CloseCurly),
|
||||
(compile(r"\["), Tok.OpenSquare),
|
||||
(compile(r"\]"), Tok.CloseSquare),
|
||||
(compile(r","), Tok.Comma),
|
||||
(compile(r":"), Tok.Colon),
|
||||
(compile(r'"[^"]*"'), Tok.String),
|
||||
(compile(r'\d+'), Tok.Number),
|
||||
]
|
||||
"""
|
||||
Implements the following grammar:
|
||||
A mapping of regexs to the tokens the identify
|
||||
|
||||
Start := PredicateSection <Idents> Newline
|
||||
VariablesSection <Idents> Newline
|
||||
ConstantsSection <Idents> Newline
|
||||
FunctionsSection <Idents> Newline
|
||||
ClausesSection <Clauses> Eof
|
||||
|
||||
Idents := Identifier <Idents>
|
||||
:= ε
|
||||
|
||||
Clauses := Newline <Clauses'>
|
||||
:= ε
|
||||
|
||||
Clauses' := <Clause> <Clauses>
|
||||
:= ε
|
||||
|
||||
Clause := <Term> <Clause'>
|
||||
|
||||
Clause' := <Clause>
|
||||
:= ε
|
||||
|
||||
Term := Negate <Term>
|
||||
:= Identifier <Func?>
|
||||
|
||||
Func? := OpenP <Term> <CSTerms> CloseP
|
||||
:= ε
|
||||
|
||||
CSTerms := Comma <Term> <CSTerms>
|
||||
:= ε
|
||||
"""
|
||||
|
||||
def lex_and_parse(input: str) -> Result[Result[KnowledgeBase, GenIrError], Tuple[Lexeme[Tok], Collection[Tok]]]:
|
||||
lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, input))
|
||||
|
||||
oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
|
||||
parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
|
||||
match parser_(lexemes):
|
||||
case Ok([Ok(ir)]):
|
||||
return Ok(Ok(ir))
|
||||
case Ok([Err(err)]):
|
||||
return Ok(Err(err))
|
||||
case Ok(huh):
|
||||
raise Exception('Unexpected end result: ', huh)
|
||||
case Err(e_tup):
|
||||
return Err(e_tup) #type:ignore
|
||||
raise Exception('Unreachable')
|
||||
|
||||
if __name__ == '__main__':
|
||||
# from emis_funky_funktions import cur2, flip
|
||||
# from build_oracle import print_oracle_table_enum, oracle_table
|
||||
# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
|
||||
import sys
|
||||
|
||||
if len(sys.argv) == 2:
|
||||
with open(sys.argv[1]) as file:
|
||||
match lex_and_parse(file.read()):
|
||||
case Ok(Ok(ir)):
|
||||
print('\n'.join(' or '.join(str(t) for t in c) for c in ir))
|
||||
case Ok(Err(err)):
|
||||
print(err)
|
||||
exit(102)
|
||||
case Err((Lexeme(token, text, line, col_start, col_end), expected)):
|
||||
print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')
|
||||
exit(101)
|
||||
else:
|
||||
print(f'Usage: python {sys.argv[0]} <cnf-file-to-parse>')
|
||||
exit(100)
|
||||
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
||||
on the list also matches. All unicode strings should be matched by at least one token.
|
||||
"""
|
281
ir.py
281
ir.py
|
@ -1,281 +0,0 @@
|
|||
from emis_funky_funktions import *
|
||||
|
||||
from dataclasses import dataclass
|
||||
from functools import reduce
|
||||
from typing import Collection, FrozenSet, Sequence, TypeAlias
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Subst:
|
||||
"""
|
||||
A substitution which may be performed on a term.
|
||||
|
||||
Only variables may be substituted, but they may be substituted with any term. That
|
||||
is, `x1/Chris` is valid, but `Chris/x1` is not.
|
||||
|
||||
It is strongly recommended to avoid recursive substitutions, that is, substitutions
|
||||
which contain the variable they are replacing
|
||||
"""
|
||||
variable: str
|
||||
replacement: 'IRTerm'
|
||||
def __str__(self) -> str:
|
||||
return repr(self)
|
||||
def __repr__(self) -> str:
|
||||
return f'{self.replacement}/{self.variable}'
|
||||
|
||||
Substitutions: TypeAlias = Sequence[Subst]
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UnificationMismatch:
|
||||
"""
|
||||
Indicates that two terms failed to unify
|
||||
|
||||
Contains the two terms, each of which is a valid subterm of one of the two original
|
||||
terms, and which do not superficially unify.
|
||||
"""
|
||||
term1: 'IRTerm'
|
||||
term2: 'IRTerm'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LengthMismatch:
|
||||
"""
|
||||
Indicates that two clauses/argument lists failed to unify due to a length mismatch
|
||||
|
||||
Contains the first element of the two lists which didn't have a corresponding term on
|
||||
the other side
|
||||
"""
|
||||
term: 'IRTerm'
|
||||
|
||||
UnificationError = UnificationMismatch | LengthMismatch
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IRProp:
|
||||
"""
|
||||
Represents a proposition or object for resolution
|
||||
|
||||
Can have any number of arguments, each of which should be a `IRTerm`. Note that no
|
||||
distinction is made between predicates (n-arity, logical statements), functions
|
||||
(positive-arity functions over objects), and constants (stand-ins for objects)
|
||||
"""
|
||||
|
||||
name: str
|
||||
"""
|
||||
The identifier of this thing, including its location in the source
|
||||
"""
|
||||
|
||||
arguments: 'Tuple[IRTerm, ...]' = tuple()
|
||||
|
||||
def subst(self, subst: Subst) -> 'IRTerm':
|
||||
"""
|
||||
Perform substitution on a proposition
|
||||
|
||||
Returns the same proposition, but with any instances of the variable named in the
|
||||
substitution replaced with the contents of the substitution.
|
||||
|
||||
>>> original = IRProp('angry', (IRVar('x1'),))
|
||||
>>> original
|
||||
angry(*x1)
|
||||
|
||||
>>> original.subst(Subst('x1', IRProp('Alex')))
|
||||
angry(Alex())
|
||||
"""
|
||||
return IRProp(self.name, tuple(arg.subst(subst) for arg in self.arguments))
|
||||
def __str__(self) -> str:
|
||||
return repr(self)
|
||||
def __repr__(self) -> str:
|
||||
return f'{self.name}({",".join(str(arg) for arg in self.arguments)})'
|
||||
def __contains__(self, var: str) -> bool:
|
||||
"""
|
||||
Test if a variable with a given name exists in this term
|
||||
|
||||
>>> 'x1' in IRProp('friends', [IRProp('John'), IRProp('mother_of', [IRVar('x1')])])
|
||||
True
|
||||
"""
|
||||
return any(var in term for term in self.arguments)
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IRVar:
|
||||
"""
|
||||
A variable which may be substituted for any other term
|
||||
"""
|
||||
name: str
|
||||
def subst(self, subst: Subst) -> 'IRTerm':
|
||||
"""
|
||||
Perform substitution on a proposition
|
||||
|
||||
Returns the same proposition, but with any instances of the variable named in the
|
||||
substitution replaced with the contents of the substitution.
|
||||
|
||||
>>> IRVar('x1').subst(Subst('x1', IRProp('Alex')))
|
||||
Alex()
|
||||
|
||||
>>> IRVar('x1').subst(Subst('x2', IRProp('Alex')))
|
||||
*x1
|
||||
"""
|
||||
if self.name == subst.variable:
|
||||
return subst.replacement
|
||||
else:
|
||||
return self
|
||||
def __str__(self) -> str:
|
||||
return repr(self)
|
||||
def __repr__(self) -> str:
|
||||
return f'*{self.name}'
|
||||
def __contains__(self, var: str) -> bool:
|
||||
"""
|
||||
Test if a variable with a given name exists in this term
|
||||
|
||||
>>> 'x1' in IRVar('x1')
|
||||
True
|
||||
|
||||
>>> 'x1' in IRVar('x2')
|
||||
False
|
||||
"""
|
||||
return var == self.name
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IRNeg:
|
||||
"""
|
||||
A negated proposition
|
||||
"""
|
||||
inner: 'IRTerm'
|
||||
def subst(self, subst: Subst) -> 'IRTerm':
|
||||
"""
|
||||
Perform substitution on a proposition
|
||||
|
||||
Returns the same proposition, but with any instances of the variable named in the
|
||||
substitution replaced with the contents of the substitution.
|
||||
|
||||
>>> original = IRNeg(IRProp('happy', [IRVar('x1')]))
|
||||
>>> original
|
||||
¬happy(*x1)
|
||||
|
||||
>>> original.subst(Subst('x1', IRProp('parent', [IRProp('Susie')])))
|
||||
¬happy(parent(Susie()))
|
||||
"""
|
||||
return IRNeg(self.inner.subst(subst))
|
||||
def __str__(self) -> str:
|
||||
return repr(self)
|
||||
def __repr__(self) -> str:
|
||||
return f'¬{self.inner}'
|
||||
def __contains__(self, var: str) -> bool:
|
||||
"""
|
||||
Test if a variable with a given name exists in this term
|
||||
|
||||
>>> 'x1' in IRNeg(IRProp('round', [IRVar('x1')]))
|
||||
True
|
||||
"""
|
||||
return var in self.inner
|
||||
|
||||
IRTerm: TypeAlias = IRVar | IRProp | IRNeg
|
||||
Clause: TypeAlias = FrozenSet[IRTerm]
|
||||
Clause_: TypeAlias = Collection[IRTerm]
|
||||
"""
|
||||
A more general definition of `Clause` which uses a collection rather than a frozen set
|
||||
|
||||
Every `Clause` is a `Clause_`, but not vice versa. In other words, true `Clause` is a
|
||||
subclass of `Clause_`.
|
||||
|
||||
Due to this generalization, `Clause_` does not necessarily benefit from hashability or
|
||||
deduplication. It exists mostly to make inputing things easier, e.g. in doctests.
|
||||
"""
|
||||
|
||||
KnowledgeBase: TypeAlias = FrozenSet[Clause]
|
||||
KnowledgeBase_: TypeAlias = Collection[Clause_]
|
||||
"""
|
||||
A more general version of `KnowledgeBase`
|
||||
|
||||
`KnowledgeBase_` : `KnowledgeBase` :: `Clause_` : `Clause`
|
||||
|
||||
A superclass of `KnowledgeBase`
|
||||
"""
|
||||
|
||||
sub_all: Callable[[Substitutions, IRTerm], IRTerm] = p(reduce, lambda t, s: t.subst(s)) #type:ignore
|
||||
"""
|
||||
Perform a series of substitutions on a term
|
||||
|
||||
Applies every substitution to the term in order
|
||||
|
||||
>>> sub_all(
|
||||
... [Subst('x1', IRVar('x2')), Subst('x2', IRProp('Karkat'))],
|
||||
... IRProp('kismesis', [IRVar('x1'), IRVar('x2')]),
|
||||
... )
|
||||
kismesis(Karkat(), Karkat())
|
||||
"""
|
||||
|
||||
def unify(t1: IRTerm, t2: IRTerm) -> Result[Substitutions, UnificationError]:
|
||||
"""
|
||||
Attempt to find a substitution that unifies two terms
|
||||
|
||||
If successful, the returned substitutions will cause both term to be equal, when
|
||||
applied to both.
|
||||
|
||||
If this method fails, then the pair of subterms which caused the unification to fail
|
||||
are returned.
|
||||
|
||||
>>> unify(
|
||||
... IRProp('imaginary', [IRProp('Rufio')]),
|
||||
... IRProp('imaginary', [IRVar('x1')])
|
||||
... )
|
||||
Ok((Rufio()/x1,))
|
||||
|
||||
>>> unify(
|
||||
... IRProp('dating', [IRProp('Jade'), IRVar('x1')]),
|
||||
... IRProp('dating', [IRVar('x1'), IRProp('John')])
|
||||
... )
|
||||
Err(UnificationMismatch(term1=Jade(), term2=John()))
|
||||
|
||||
>>> unify(
|
||||
... IRProp('mother_of', [IRVar('x1')]),
|
||||
... IRVar('x1')
|
||||
... )
|
||||
Err(UnificationMismatch(term1=mother_of(*x1), term2=*x1))
|
||||
"""
|
||||
match (t1, t2):
|
||||
case (IRVar(v1), IRVar(v2)) if v1 == v2:
|
||||
return Ok(tuple())
|
||||
case (IRVar(v), t_other) | (t_other, IRVar(v)) if v not in t_other: #type: ignore
|
||||
return Ok((Subst(v, t_other),))
|
||||
case (IRProp(n1, a1), IRProp(n2, a2)) if n1 == n2 and len(a1) == len(a2):
|
||||
return unify_lists(a1, a2)
|
||||
case (IRNeg(i1), IRNeg(i2)):
|
||||
return unify(i1, i2)
|
||||
return Err(UnificationMismatch(t1, t2))
|
||||
|
||||
def unify_lists(c1: Sequence[IRTerm], c2: Sequence[IRTerm]) -> Result[Substitutions, UnificationError]:
|
||||
"""
|
||||
Attempt to perform unification on two term/argument lists
|
||||
|
||||
See `unify()` for the details of how this works. When working with lists, the same
|
||||
rules apply. The substitutions, when applied to every term of both lists, will
|
||||
cause the lists to become exactly the same.
|
||||
|
||||
Lists which are not the same length cannot be unified, and will always fail.
|
||||
|
||||
Notice the difference between a list of `IRTerm`s and a `Clause`: Namely, that a
|
||||
`Clause` is unordered, while a list of `IRTerm`s is ordered.
|
||||
|
||||
>>> unify_lists(
|
||||
... [ IRProp('imaginary', [IRProp('Rufio')]), IRProp('friend', [IRVar('x1'), IRVar('x3')]) ],
|
||||
... [ IRProp('imaginary', [IRVar('x1')]), IRProp('friend', [IRVar('x2'), IRProp('Tavros')]) ]
|
||||
... )
|
||||
Ok((Rufio()/x1, Rufio()/x2, Tavros()/x3))
|
||||
|
||||
>>> unify_lists(
|
||||
... [ IRProp('imaginary', [IRProp('Rufio')]), IRProp('friend', [IRVar('x1'), IRVar('x3')]) ],
|
||||
... [ IRProp('imaginary', [IRVar('x1')]) ]
|
||||
... )
|
||||
Err(LengthMismatch(term=friend(Rufio(),*x3)))
|
||||
"""
|
||||
match (c1, c2):
|
||||
case ([], []):
|
||||
return Ok(tuple())
|
||||
case ([h1, *t1], [h2, *t2]):
|
||||
return unify(h1, h2) << (lambda subs:
|
||||
unify_lists((*map(p(sub_all,subs),t1),), (*map(p(sub_all,subs),t2),)) <= (
|
||||
lambda final_subs: (*subs, *final_subs)))
|
||||
case ([h, *t], []) | ([], [h, *t]):
|
||||
return Err(LengthMismatch(h))
|
||||
raise Exception('Unreachable')
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
doctest.testmod()
|
36
lab2.py
36
lab2.py
|
@ -1,36 +0,0 @@
|
|||
from emis_funky_funktions import *
|
||||
|
||||
from typing import Sequence
|
||||
|
||||
from grammar import lex_and_parse
|
||||
from lex import Lexeme
|
||||
from resolution import derives_false
|
||||
|
||||
def main(args: Sequence[str]) -> Tuple[str, int]:
|
||||
match args[1:]:
|
||||
case [file]:
|
||||
with open(file) as input:
|
||||
match lex_and_parse(input.read()):
|
||||
case Ok(Ok(ir_kb)):
|
||||
return (
|
||||
'no'
|
||||
if derives_false(ir_kb) else
|
||||
'yes',
|
||||
0
|
||||
)
|
||||
case Ok(Err(gen_ir_err)):
|
||||
return (str(gen_ir_err), 102)
|
||||
case Err((Lexeme(token, text, line, col_start, col_end), expected)):
|
||||
return (
|
||||
f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}',
|
||||
101
|
||||
)
|
||||
case _:
|
||||
return (f'Usage: python {args[0]} some_file.cnf', 100)
|
||||
raise Exception('Unreachable')
|
||||
|
||||
if __name__ == '__main__':
|
||||
from sys import argv
|
||||
out, code = main(argv)
|
||||
print(out)
|
||||
exit(code)
|
141
parse.py
141
parse.py
|
@ -1,141 +0,0 @@
|
|||
from emis_funky_funktions import *
|
||||
|
||||
from dataclasses import dataclass
|
||||
from functools import wraps
|
||||
from operator import contains
|
||||
from typing import Callable, Collection, Mapping, TypeGuard, TypeAlias
|
||||
|
||||
def _expected(row: Mapping[B, Collection[Sequence[Any]]]) -> Collection[B]:
|
||||
"""
|
||||
Given a single row from an oracle table, identify the expected terminals
|
||||
"""
|
||||
return [
|
||||
terminal
|
||||
for terminal, expansions in row.items()
|
||||
if len(expansions)
|
||||
]
|
||||
|
||||
Action: TypeAlias = Callable[[Sequence[C | D]], Sequence[C | D]]
|
||||
def parser(
|
||||
oracle: Mapping[A, Mapping[B, Collection[Sequence[A | B | Action]]]],
|
||||
identify_lexeme: Callable[[D], B],
|
||||
start_symbol: A,
|
||||
) -> Callable[[Sequence[D]], Result[Sequence[C | D], Tuple[D, Collection[B]]]]:
|
||||
"""
|
||||
Produces a parser based on a grammar, an oracle, and a start symbol.
|
||||
|
||||
The `identify_lexeme` argument should be a function which converts a lexeme into the
|
||||
token that it represents. This allows for the actual lexemes that are being fed in to
|
||||
be more complex, and store additional data.
|
||||
|
||||
The oracle table my include "action" annotations in its sequences. Actions should be
|
||||
an instance of `Action`, and should work on the AST stack. Every matched lexeme is
|
||||
pushed to the AST stack. An action may transform this stack by popping some number of
|
||||
items off of it, constructing some AST, pushing that AST back to the stack, and then
|
||||
returning the modified stack.
|
||||
|
||||
A couple things to note about this process:
|
||||
- The stack that is passed to each action is immutable. "Modifications" should be
|
||||
made by simply constructing and returning a new stack.
|
||||
- The bottom of the stack is the zero index.
|
||||
|
||||
If a parse is successful, the return value will be the AST stack at the end of the
|
||||
parse. It is up the the caller to verify that this is an expected result.
|
||||
|
||||
If a parse fails, the return value will be a tuple containing the erronious lexeme and
|
||||
a collection of expected tokens which failed to match it.
|
||||
|
||||
### Example:
|
||||
|
||||
We generate a simple grammar:
|
||||
|
||||
>>> class SimpleVariable(IntEnum):
|
||||
... S = auto()
|
||||
... Sum = auto()
|
||||
... Sum_ = auto()
|
||||
>>> class SimpleTerminal(IntEnum):
|
||||
... Number = auto()
|
||||
... Plus = auto()
|
||||
... Eof = auto()
|
||||
... def __repr__(self):
|
||||
... return self.name
|
||||
>>> build_S = lambda x: x[1:]
|
||||
>>> build_Sum = lambda x: (x[0](x[1][1]), *x[2:])
|
||||
>>> build_Sum_1 = lambda x: (lambda y: x[0] + y, *x[2:])
|
||||
>>> build_Sum_2 = lambda x: (lambda y: y, *x)
|
||||
>>> grammar = [
|
||||
... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]),
|
||||
... (SimpleVariable.Sum, [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]),
|
||||
... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum, build_Sum_1]),
|
||||
... (SimpleVariable.Sum_, [build_Sum_2]),
|
||||
... ]
|
||||
>>> is_term = p_instance(SimpleTerminal)
|
||||
>>> is_var = p_instance(SimpleVariable)
|
||||
>>> my_oracle_table = oracle_table(is_term, is_var, grammar)
|
||||
>>> my_parser = parser(my_oracle_table, lambda x: x[0], SimpleVariable.S)
|
||||
|
||||
>>> my_parser([
|
||||
... (SimpleTerminal.Number, 1),
|
||||
... (SimpleTerminal.Plus,),
|
||||
... (SimpleTerminal.Number, 3),
|
||||
... (SimpleTerminal.Plus,),
|
||||
... (SimpleTerminal.Number, 10),
|
||||
... (SimpleTerminal.Eof,),
|
||||
... ])
|
||||
Ok((14,))
|
||||
|
||||
>>> my_parser([
|
||||
... (SimpleTerminal.Number, 1),
|
||||
... (SimpleTerminal.Plus,),
|
||||
... (SimpleTerminal.Number, 3),
|
||||
... (SimpleTerminal.Number, 10), # <--- this is invalid!
|
||||
... (SimpleTerminal.Eof,),
|
||||
... ])
|
||||
Err(((Number, 10), [Plus, Eof]))
|
||||
"""
|
||||
is_var: Callable[[Any], TypeGuard[A]] = p_instance(start_symbol.__class__)
|
||||
is_tok: Callable[[Any], TypeGuard[B]] = p_instance(next(iter(oracle[start_symbol].keys())).__class__)
|
||||
def inner(
|
||||
stack: Sequence[A | B | Action],
|
||||
ast_stack: Sequence[C | D],
|
||||
lexemes: Sequence[D],
|
||||
) -> Result[Sequence[C | D], Tuple[D, Collection[B]]]:
|
||||
match stack:
|
||||
# A [Variable]
|
||||
case [top_of_stack, *popped_stack] if is_var(top_of_stack):
|
||||
try:
|
||||
expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])]
|
||||
except IndexError:
|
||||
raise Exception('Unexpected end of input. Expected:', _expected(oracle[top_of_stack]))
|
||||
match expansions:
|
||||
case []:
|
||||
return Err((lexemes[0], _expected(oracle[top_of_stack])))
|
||||
case [expansion]:
|
||||
return inner((*expansion, *popped_stack), ast_stack, lexemes)
|
||||
case _:
|
||||
raise Exception('Not an LL(1) grammar!!!')
|
||||
# B [Token] (match)
|
||||
case [top_of_stack, *popped_stack] if is_tok(top_of_stack) and top_of_stack == identify_lexeme(lexemes[0]):
|
||||
return inner(popped_stack, (lexemes[0], *ast_stack), lexemes[1:])
|
||||
# B [Token] (no match)
|
||||
case [top_of_stack, *popped_stack] if is_tok(top_of_stack):
|
||||
assert is_tok(top_of_stack)
|
||||
return Err((lexemes[0], (top_of_stack,)))
|
||||
# Action
|
||||
case [f, *popped_stack]:
|
||||
assert hasattr(f, '__call__')
|
||||
return inner(popped_stack, f(ast_stack), lexemes)
|
||||
# Empty stack (finished parsing)
|
||||
case []:
|
||||
if len(lexemes):
|
||||
return Err((lexemes[0], []))
|
||||
else:
|
||||
return Ok(ast_stack)
|
||||
raise Exception('Unreachable!')
|
||||
return wraps(parser)(p(inner, [start_symbol], []))
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
from enum import auto, IntEnum
|
||||
from build_oracle import oracle_table
|
||||
doctest.testmod()
|
250
resolution.py
250
resolution.py
|
@ -1,250 +0,0 @@
|
|||
from emis_funky_funktions import *
|
||||
|
||||
from itertools import combinations, product
|
||||
from operator import eq
|
||||
from typing import Collection, FrozenSet, TypeAlias
|
||||
|
||||
from ir import Clause, Clause_, IRNeg, IRProp, IRTerm, IRVar, KnowledgeBase, KnowledgeBase_, Substitutions, sub_all, unify
|
||||
|
||||
def terms_cancel(t1: IRTerm, t2: IRTerm) -> Option[Substitutions]:
|
||||
"""
|
||||
Determine if two terms could cancel each other out, given the right substitutions
|
||||
|
||||
That is, is there some set of substitutions such that t1 = ¬t2 or ¬t1 = t2?
|
||||
|
||||
If negation is possible and a substitution exists, that substitution is returned.
|
||||
Otherwise, `None` is returned.
|
||||
|
||||
>>> terms_cancel(IRNeg(IRVar('x1')), IRProp('Terezi'))
|
||||
Some((Terezi()/x1,))
|
||||
|
||||
>>> terms_cancel(IRProp('Nepeta'), IRVar('x1'))
|
||||
Some((¬Nepeta()/x1,))
|
||||
|
||||
>>> terms_cancel(IRProp('ancestor', [IRVar('x1')]), IRVar('x1')) is None
|
||||
True
|
||||
"""
|
||||
match (t1, t2):
|
||||
case (IRNeg(a), b) | (b, IRNeg(a)): #type: ignore
|
||||
return hush(unify(a, b))
|
||||
case (IRVar(_) as x, a) | (a, IRVar(_) as x): #type: ignore
|
||||
return hush(unify(x, IRNeg(a)))
|
||||
return None
|
||||
|
||||
def merge_clauses(c1: Clause_, c2: Clause_) -> KnowledgeBase:
|
||||
"""
|
||||
Produce a list of all possible clauses which resolution could derive from c1 and c2
|
||||
|
||||
For each term in c1 that could cancel with a term in c2 using a substitution, a
|
||||
possible clause is produced equal to the union of c1 and c2 with the canceled
|
||||
terms removed and the substitution applied.
|
||||
|
||||
>>> merge_clauses(
|
||||
... [ IRProp('day'), IRNeg(IRProp('night')) ],
|
||||
... [ IRProp('night') ]
|
||||
... )
|
||||
{ { day() } }
|
||||
|
||||
>>> merge_clauses(
|
||||
... [ IRNeg(IRProp('transgender', [IRVar('x1')])), IRProp('powerful', [IRVar('x1')]) ],
|
||||
... [ IRNeg(IRProp('powerful', [IRVar('x2')])), IRProp('god', [IRVar('x2')]) ]
|
||||
... )
|
||||
{ { god(*x1), ¬transgender(*x1) } }
|
||||
|
||||
>>> merge_clauses(
|
||||
... [ IRNeg(IRProp('day')), IRProp('night') ],
|
||||
... [ IRVar('x2') ]
|
||||
... )
|
||||
{ { night() }, { ¬day() } }
|
||||
|
||||
If two clauses cannot merge, an empty set is returned
|
||||
|
||||
>>> merge_clauses(
|
||||
... [ IRProp('day') ],
|
||||
... [ IRProp('wet') ]
|
||||
... )
|
||||
{ }
|
||||
"""
|
||||
terms1, terms2 = list(c1), list(c2)
|
||||
valid_substitutions = drop_none(
|
||||
map_opt(lambda subs: (subs, i1, i2), terms_cancel(t1, t2))
|
||||
for ((i1, t1), (i2, t2)) in product(enumerate(terms1), enumerate(terms2))
|
||||
)
|
||||
return FSet(
|
||||
FSet(
|
||||
sub_all(subs, term)
|
||||
for term in (*terms1[:i1], *terms1[i1 + 1:], *terms2[:i2], *terms2[i2 + 1:])
|
||||
)
|
||||
for (subs, i1, i2) in valid_substitutions
|
||||
)
|
||||
|
||||
def derive(clauses: KnowledgeBase_) -> KnowledgeBase:
|
||||
"""
|
||||
All possible clauses which derive in one step of resolution from a knowledge base
|
||||
|
||||
Attempts to merge every possible combination of clauses, in the knowledge base.
|
||||
|
||||
>>> derive([
|
||||
... [IRNeg(IRProp('animal', [IRProp('Kim')]))],
|
||||
... [IRNeg(IRProp('dog', [IRVar('x0')])), IRProp('animal', [IRVar('x0')])],
|
||||
... [IRNeg(IRProp('cat', [IRVar('x1')])), IRProp('animal', [IRVar('x1')])],
|
||||
... [IRProp('dog', [IRProp('Kim')])],
|
||||
... ]) #doctest: +NORMALIZE_WHITESPACE
|
||||
{
|
||||
{ animal(Kim()) },
|
||||
{ ¬cat(Kim()) },
|
||||
{ ¬dog(Kim()) }
|
||||
}
|
||||
"""
|
||||
return FSet(
|
||||
FSet(clause)
|
||||
for (c1, c2) in combinations(clauses, 2)
|
||||
for clause in merge_clauses(c1, c2)
|
||||
)
|
||||
|
||||
def derive2(kb1: KnowledgeBase_, kb2: KnowledgeBase_) -> KnowledgeBase:
|
||||
"""
|
||||
All clauses which derive in one step from the combination of two knowledge bases
|
||||
|
||||
Each resulting clause is the combination of one clause from the left knowledge base
|
||||
with exactly one clause from the right knowledge base. Clauses from the same
|
||||
knowledge base will not be combined.
|
||||
|
||||
>>> derive2([
|
||||
... [IRNeg(IRProp('animal', [IRProp('Kim')]))],
|
||||
... [IRNeg(IRProp('cat', [IRVar('x1')])), IRProp('animal', [IRVar('x1')])],
|
||||
... ], [
|
||||
... [IRNeg(IRProp('dog', [IRVar('x0')])), IRProp('animal', [IRVar('x0')])],
|
||||
... [IRProp('dog', [IRProp('Kim')])],
|
||||
... ])
|
||||
{ { ¬dog(Kim()) } }
|
||||
"""
|
||||
return FSet(
|
||||
FSet(clause)
|
||||
for (c1, c2) in product(kb1, kb2)
|
||||
for clause in merge_clauses(c1, c2)
|
||||
)
|
||||
|
||||
false_clause: Clause = FSet()
|
||||
"""
|
||||
The clause which represents the logical condition False
|
||||
|
||||
i.e. the empty clause
|
||||
"""
|
||||
|
||||
|
||||
def next_generation(
|
||||
previous_generations: KnowledgeBase,
|
||||
current_generation: KnowledgeBase,
|
||||
) -> KnowledgeBase:
|
||||
"""
|
||||
Advance resolution by one step
|
||||
|
||||
This performs `derive()` on the current generation as well as `derive2()` between the
|
||||
current generation and all clauses from previous generations in order to produce a new
|
||||
generation, hopefully with new and interesting clauses in it.
|
||||
|
||||
When using the new generation, previous generations should be considered to be the sum
|
||||
of all clauses within the two knowledge bases passed to this function. That is, all
|
||||
clauses which were used in the generation of that generation.
|
||||
|
||||
### Example
|
||||
|
||||
Starting from an example knowledge base, we produce a new generation. Notice that the
|
||||
empty list for previous generations indicates that this is the zeroth (original)
|
||||
knowledge base.
|
||||
|
||||
>>> next_generation(fset(
|
||||
... ), fset(
|
||||
... fset( IRNeg(IRProp('animal', (IRProp('Kim'),))) ),
|
||||
... fset( IRNeg(IRProp('dog', (IRVar('x0'),))), IRProp('animal', (IRVar('x0'),)) ),
|
||||
... fset( IRNeg(IRProp('cat', (IRVar('x1'),))), IRProp('animal', (IRVar('x1'),)) ),
|
||||
... fset( IRProp('dog', (IRProp('Kim'),)) ),
|
||||
... )) #doctest: +NORMALIZE_WHITESPACE
|
||||
{
|
||||
{ animal(Kim()) },
|
||||
{ ¬cat(Kim()) },
|
||||
{ ¬dog(Kim()) }
|
||||
}
|
||||
|
||||
Now, we perform the next round of resolution. All of the terms originally passed into
|
||||
the first generation have been moved to the previous generations knowledge base, and
|
||||
the current generation contains only the results from the first call.
|
||||
|
||||
>>> next_generation(fset(
|
||||
... fset( IRNeg(IRProp('animal', (IRProp('Kim'),))) ),
|
||||
... fset( IRNeg(IRProp('dog', (IRVar('x0'),))), IRProp('animal', (IRVar('x0'),)) ),
|
||||
... fset( IRNeg(IRProp('cat', (IRVar('x1'),))), IRProp('animal', (IRVar('x1'),)) ),
|
||||
... fset( IRProp('dog', (IRProp('Kim'),)) ),
|
||||
... ), fset(
|
||||
... fset( IRNeg(IRProp('dog', (IRProp('Kim'),))) ),
|
||||
... fset( IRNeg(IRProp('cat', (IRProp('Kim'),))) ),
|
||||
... fset( IRProp('animal', (IRProp('Kim'),)) ),
|
||||
... ))
|
||||
{ { } }
|
||||
|
||||
The return of a knowledge base containing false (ie `[]`) indicates that resolution
|
||||
has found a contradiction. In this case, there are two: One from ¬dog(Kim) and
|
||||
dog(Kim), and one from animal(Kim) and ¬animal(Kim).
|
||||
|
||||
Excluded from the next generation is any term which exists in a previous generation.
|
||||
For example:
|
||||
|
||||
>>> next_generation(fset(
|
||||
... ), fset(
|
||||
... fset( IRNeg(IRProp('day')), IRProp('day') ),
|
||||
... fset( IRProp('day') ),
|
||||
... ))
|
||||
{ }
|
||||
|
||||
This configuration could theoretically derive day() again, however, it does not,
|
||||
because that is already known, and is present in the current generation.
|
||||
"""
|
||||
return FSet(
|
||||
(derive(current_generation) | derive2(current_generation, previous_generations))
|
||||
- current_generation
|
||||
- previous_generations
|
||||
)
|
||||
def derives_false(
|
||||
knowledge_base: KnowledgeBase,
|
||||
previous_generations: KnowledgeBase = FSet(),
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if it is possible to derive false from a given knowledge base.
|
||||
|
||||
Uses any number of generations of resolution via `next_generation()` to find a
|
||||
generation which contains the `false_clause`, indicating that the original knowledge
|
||||
base (and all subsiquent generations) are contradictory.
|
||||
|
||||
`previous_generations` may be set if the current knowledge base was derived from some
|
||||
other series of clauses using `derive()`.
|
||||
|
||||
>>> derives_false(fset(
|
||||
... fset( IRNeg(IRProp('animal', (IRProp('Kim'),))) ),
|
||||
... fset( IRNeg(IRProp('dog', (IRVar('x0'),))), IRProp('animal', (IRVar('x0'),)) ),
|
||||
... fset( IRNeg(IRProp('cat', (IRVar('x1'),))), IRProp('animal', (IRVar('x1'),)) ),
|
||||
... fset( IRProp('dog', (IRProp('Kim'),)) ),
|
||||
... ))
|
||||
True
|
||||
|
||||
>>> derives_false(fset(
|
||||
... fset( IRNeg(IRProp('animal', (IRProp('Kim'),))) ),
|
||||
... fset( IRNeg(IRProp('dog', (IRVar('x0'),))), IRProp('animal', (IRVar('x0'),)) ),
|
||||
... fset( IRNeg(IRProp('cat', (IRVar('x1'),))), IRProp('animal', (IRVar('x1'),)) ),
|
||||
... ))
|
||||
False
|
||||
"""
|
||||
if false_clause in knowledge_base:
|
||||
return True
|
||||
elif len(knowledge_base) == 0:
|
||||
return False
|
||||
else:
|
||||
return derives_false(
|
||||
next_generation(previous_generations, knowledge_base),
|
||||
knowledge_base | previous_generations
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
doctest.testmod()
|
45
tokens.py
45
tokens.py
|
@ -1,45 +0,0 @@
|
|||
from enum import auto, IntEnum
|
||||
from typing import Collection, Tuple
|
||||
from re import compile, Pattern
|
||||
|
||||
class Tok(IntEnum):
|
||||
"""
|
||||
All possible tokens used in the grammar
|
||||
"""
|
||||
Newline = auto()
|
||||
Whitespace = auto()
|
||||
PredicateSection = auto()
|
||||
VariablesSection = auto()
|
||||
ConstantsSection = auto()
|
||||
FunctionsSection = auto()
|
||||
ClausesSection = auto()
|
||||
Negate = auto()
|
||||
OpenP = auto()
|
||||
CloseP = auto()
|
||||
Comma = auto()
|
||||
Identifier = auto()
|
||||
Eof = auto()
|
||||
|
||||
def __repr__(self):
|
||||
return self._name_
|
||||
|
||||
LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
|
||||
(compile(r"\n"), Tok.Newline),
|
||||
(compile(r"[ \t]+"), Tok.Whitespace),
|
||||
(compile("Predicates:"), Tok.PredicateSection),
|
||||
(compile("Variables:"), Tok.VariablesSection),
|
||||
(compile("Constants:"), Tok.ConstantsSection),
|
||||
(compile("Functions:"), Tok.FunctionsSection),
|
||||
(compile("Clauses:"), Tok.ClausesSection),
|
||||
(compile("!"), Tok.Negate),
|
||||
(compile(r"\("), Tok.OpenP),
|
||||
(compile(r"\)"), Tok.CloseP),
|
||||
(compile(","), Tok.Comma),
|
||||
(compile(r"\w+"), Tok.Identifier),
|
||||
]
|
||||
"""
|
||||
A mapping of regexs to the tokens the identify
|
||||
|
||||
Tokens earlier on in the list should be regarded as higher priority, even if a match lower
|
||||
on the list also matches. All unicode strings should be matched by at least one token.
|
||||
"""
|
Loading…
Reference in a new issue