From 206c0a0ec0c955b15cb8595897bfe26f6ed18589 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Tue, 7 Mar 2023 20:42:10 -0500 Subject: [PATCH] Start making a grammar then realize there's a builtin json parser --- build_oracle.py | 203 ------------------------------ grammar.py | 323 +++++------------------------------------------- ir.py | 281 ----------------------------------------- lab2.py | 36 ------ parse.py | 141 --------------------- resolution.py | 250 ------------------------------------- tokens.py | 45 ------- 7 files changed, 31 insertions(+), 1248 deletions(-) delete mode 100644 build_oracle.py delete mode 100644 ir.py delete mode 100644 lab2.py delete mode 100644 parse.py delete mode 100644 resolution.py delete mode 100644 tokens.py diff --git a/build_oracle.py b/build_oracle.py deleted file mode 100644 index a33590a..0000000 --- a/build_oracle.py +++ /dev/null @@ -1,203 +0,0 @@ -""" -Tools for building an oracle table - -See `grammar` and `build_oracle.sh` for scripts which actually produce python code. This -module only produces an oracle table in python, without outputting it. - -NOTE! Doctests in this module use `GRAMMAR` from `grammar.py` and `EGRAMMAR` as a version -of that grammar with the actions erased, with `_erase_actions()`. -""" -from emis_funky_funktions import * - -from enum import auto, Enum, IntEnum -from functools import cache, reduce -from operator import getitem -from typing import Any, cast, Collection, Mapping, Sequence, Set, Tuple, TypeGuard, TypeVar - -def _erase_actions_h( - handle: Sequence[A | B | C], - is_not_c: Callable[[A | B | C], TypeGuard[A | B]] -) -> Sequence[A | B]: - """ - Produce an identical handle, but with all the actions removed - """ - return [i for i in handle if is_not_c(i)] - -def _erase_actions( - grammar: Sequence[Tuple[A, Sequence[A | B | C]]], - is_not_c: Callable[[A | B | C], TypeGuard[A | B]] -) -> Sequence[Tuple[A, Sequence[A | B]]]: - """ - Produce an identical grammar, but with all the actions removed - """ - return [ - (var, _erase_actions_h(handle, is_not_c)) - for (var, handle) in grammar - ] - -def _first( - is_term: Callable[[A | B], TypeGuard[B]], - grammar: Sequence[Tuple[A, Sequence[A | B]]], - sequence: Sequence[A | B] -) -> Tuple[Collection[B], bool]: - """ - Computes all of the possible starting terminals for a handle in a given grammar - - Due to pathetic python weaknesses, the first argument you must provide is a type guard - to determine whether a certain thing is a terminal as opposed to a variable. - - Then, pass in the grammar and the sequence of terminals and variables in question. - - The output contains two values. The first is a set of possible terminals, and the - second is a boolean indicating whether this term can derive epsilon. - - >>> _first(flip(cur2(isinstance))(Tok), EGRAMMAR, [Variable.Clause]) - ({Negate, Identifier}, False) - - >>> _first(flip(cur2(isinstance))(Tok), EGRAMMAR, [Variable.CSTerms]) - ({Comma}, True) - - >>> _first(flip(cur2(isinstance))(Tok), EGRAMMAR, [Variable.CSTerms, Tok.CloseP]) - ({CloseP, Comma}, False) - """ - def inner(vs: Sequence[A | B]) -> Tuple[Set[B], bool]: - match vs: - case []: - return (set(), True) - case [v, *rest] if is_term(v): - return ({v}, False) - case [v, *rest]: - this_variable_first, derives_epsilon = reduce( - lambda acc, result: (acc[0] | result[0], acc[1] or result[1]), - [ - inner(handle) - for (other_variable, handle) in grammar - if other_variable == v - ] - ) - if derives_epsilon: - rest_first, rest_derives_epsilon = inner(rest) - return (rest_first | this_variable_first, rest_derives_epsilon) - else: - return (this_variable_first, False) - raise Exception("UNREACHABLE") - return inner(sequence) - -def _follow( - is_term: Callable[[A | B], TypeGuard[B]], - grammar: Sequence[Tuple[A, Sequence[A | B]]], -) -> Mapping[A, Collection[B]]: - """ - Produce a table indicating exactly which terminals can follow each variable - - >>> _follow(flip(cur2(isinstance))(Tok), EGRAMMAR) #doctest: +NORMALIZE_WHITESPACE - {: set(), - : {Newline}, - : {Eof}, - : {Eof}, - : {Newline, Eof}, - : {Newline, Eof}, - : {Newline, Negate, CloseP, Comma, Identifier, Eof}, - : {Newline, Negate, CloseP, Comma, Identifier, Eof}, - : {CloseP}} - """ - follow_table: Mapping[A, Set[B]] = { - variable: set() - for (variable, _) in grammar - } - def following_tokens(handle: Sequence[A | B], follows_handle: Set[B]) -> Set[B]: - handle_first, handle_derives_epsilon = _first(is_term, grammar, handle) - return set(handle_first) | (follows_handle if handle_derives_epsilon else set()) - - def inner(prev_table: Mapping[A, Set[B]]) -> Mapping[A, Set[B]]: - new_table = reduce( - lambda acc, entry: acc | {entry[0]: acc[entry[0]] | entry[1]}, - [ - ( - cast(A, handle[i]), - following_tokens(handle[i+1:], prev_table[variable]) - ) - for (variable, handle) in grammar - for i in range(len(handle)) - if not is_term(handle[i]) - ], - prev_table - ) - if new_table == prev_table: - return new_table - else: - return inner(new_table) - return inner(follow_table) - -def _predict( - is_term: Callable[[A | B], TypeGuard[B]], - grammar: Sequence[Tuple[A, Sequence[A | B]]], - follow: Mapping[A, Collection[B]], - lhs: A, - rhs: Sequence[A | B] -) -> Collection[B]: - """ - Given a production, identify the terminals which this production would be valid under - - >>> is_tok = flip(cur2(isinstance))(Tok) - >>> follow = _follow(is_tok, EGRAMMAR) - >>> _predict(is_tok, EGRAMMAR, follow, Variable.Clause, [Variable.Term, Variable.Clause_]) - {Negate, Identifier} - """ - first_rhs, epsilon_rhs = _first(is_term, grammar, rhs) - if epsilon_rhs: - return set(follow[lhs]) | set(first_rhs) - else: - return first_rhs - -def oracle_table( - is_term: Callable[[A | B], TypeGuard[B]], - is_var: Callable[[A | B], TypeGuard[A]], - grammar: Sequence[Tuple[A, Sequence[A | B]]], -) -> Mapping[A, Mapping[B, Collection[Sequence[A | B]]]]: - """ - A variant of `_oracle` that generates a table immediately rather than lazily - - No significant performance benefit - - >>> is_tok = p_instance(Tok) - >>> is_var = p_instance(Variable) - >>> my_oracle_table = oracle_table(is_tok, is_var, EGRAMMAR) - - One valid expansion: - >>> my_oracle_table[Variable.Clauses_][Tok.Negate] - [[, ]] - - One valid expansion, but it expands to epsilon: - >>> my_oracle_table[Variable.Clauses_][Tok.Eof] - [[]] - - Zero valid expansions: - >>> my_oracle_table[Variable.Term][Tok.Newline] - [] - """ - all_variables = { lhs for (lhs, rhs) in grammar } - all_terminals = { symbol for (lhs, rhs) in grammar for symbol in rhs if is_term(symbol) } - - is_not_c: Callable[[A | B | C], TypeGuard[A | B]] = lambda x: is_term(x) or is_var(x) #type:ignore - e_grammar: Sequence[Tuple[A, Sequence[A | B]]] = _erase_actions(grammar, is_not_c) #type:ignore - follow = _follow(is_term, e_grammar) - - return { - v: { - t: [ - handle - for (lhs, handle) in grammar - if lhs == v - and t in _predict(is_term, e_grammar, follow, lhs, _erase_actions_h(handle, is_not_c)) #type:ignore - ] - for t in all_terminals - } - for v in all_variables - } - -if __name__ == '__main__': - import doctest - from grammar import GRAMMAR, Tok, Variable - EGRAMMAR = _erase_actions(GRAMMAR, lambda x: not hasattr(x, '__call__')) #type: ignore - doctest.testmod() \ No newline at end of file diff --git a/grammar.py b/grammar.py index afe99bb..e3128f7 100644 --- a/grammar.py +++ b/grammar.py @@ -1,300 +1,39 @@ -""" -A grammar for parsing CNF files - -If this module is run directly in python, it will spit out valid python which produces an -oracle table for the grammar it defines. It's recommended that this be done using -`build_oracle.sh` instead, however, which will build a whole python module containing the -oracle table, complete with imports. -""" -from emis_funky_funktions import * - -from dataclasses import dataclass from enum import auto, IntEnum +from typing import Collection, Tuple from re import compile, Pattern -from build_oracle import oracle_table -from ir import IRNeg, IRProp, IRTerm, IRVar, KnowledgeBase -from lex import Lexeme, tokenize -from parse import Action, parser -from tokens import * +class Tok(IntEnum): + """ + All possible tokens used in the grammar + """ + Whitespace = auto() + OpenCurly = auto() + CloseCurly = auto() + OpenSquare = auto() + CloseSquare = auto() + Comma = auto() + Colon = auto() + String = auto() + Number = auto() + Eof = auto() -from typing import Any, Callable, cast, Collection, Mapping, Sequence, Tuple, TypeAlias + def __repr__(self): + return self._name_ - -class Variable(IntEnum): - Start = auto() - Idents = auto() - Clauses = auto() - Clauses_ = auto() - Clause = auto() - Clause_ = auto() - Term = auto() - Func = auto() - CSTerms = auto() - - def __repr__(self) -> str: - return f'<{self._name_}>' - -ASTTerm: TypeAlias = 'ASTNegated | ASTProp' - -class IdentKind(IntEnum): - Function = auto() - Constant = auto() - Variable = auto() - Predicate = auto() - -@dataclass(frozen=True) -class CallingNonFunc: - term: Lexeme[Tok] - obj_type: IdentKind - def __str__(self): - return f'Semantic error: Attempted to call {repr(self.term.matched_string)} (a {self.obj_type.name.lower()}) with arguments on line {self.term.line}:{self.term.col_start}-{self.term.col_end}' - -@dataclass(frozen=True) -class MissingArguments: - term: Lexeme[Tok] - def __str__(self): - return f'Semantic error: The function {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end} is missing arguments!' - -@dataclass(frozen=True) -class UnidentifiedVariable: - term: Lexeme[Tok] - def __str__(self): - return f'Semantic error: Unidentified identifier {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end}' - -@dataclass(frozen=True) -class PropUsedInObjectContext: - term: Lexeme[Tok] - def __str__(self): - return f'Semantic error: The proposition {repr(self.term.matched_string)} was used in a context where an object was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}' - -@dataclass(frozen=True) -class ObjectUsedInPropContext: - term: Lexeme[Tok] - obj_type: IdentKind - def __str__(self): - return f'Semantic error: The {self.obj_type.name.lower()} {repr(self.term.matched_string)} was used in a context where a proposition was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}' - -@dataclass(frozen=True) -class NegationOfObject: - line: int - col: int - def __str__(self): - return f'Semantic error: Attempted to use negation in a context where working on objects on line {self.line}:{self.col}' - -GenIrError: TypeAlias = CallingNonFunc | MissingArguments | UnidentifiedVariable | PropUsedInObjectContext | ObjectUsedInPropContext | NegationOfObject - -@dataclass(frozen=True) -class IdentBindings: - predicate_idents: Sequence[str] - variable_idents: Sequence[str] - const_idents: Sequence[str] - func_idents: Sequence[str] - -@dataclass(frozen=True) -class ASTNegated: - neg_lexeme: Lexeme[Tok] - term: ASTTerm - - def make_ir(self, idents: IdentBindings, is_prop: bool) -> 'Result[IRTerm, GenIrError]': - if is_prop: - return map_res(IRNeg, self.term.make_ir(idents, True)) - else: - return Err(NegationOfObject(self.neg_lexeme.line, self.neg_lexeme.col_start)) - -@dataclass(frozen=True) -class ASTProp: - ident: Lexeme[Tok] - arguments: Sequence[ASTTerm] - - def make_ir(self, idents: IdentBindings, is_pred: bool) -> 'Result[IRTerm, GenIrError]': - bound_type = ( - IdentKind.Predicate - if self.ident.matched_string in idents.predicate_idents else - IdentKind.Variable - if self.ident.matched_string in idents.variable_idents else - IdentKind.Constant - if self.ident.matched_string in idents.const_idents else - IdentKind.Function - if self.ident.matched_string in idents.func_idents else - None - ) - if bound_type is None: - return Err(UnidentifiedVariable(self.ident)) - - if is_pred: - if bound_type != IdentKind.Predicate: - return Err(ObjectUsedInPropContext(self.ident, bound_type)) - else: - if bound_type == IdentKind.Function: - if not len(self.arguments): - return Err(MissingArguments(self.ident)) - elif bound_type == IdentKind.Predicate: - return Err(PropUsedInObjectContext(self.ident)) - else: - if len(self.arguments): - return Err(CallingNonFunc(self.ident, bound_type)) - - if bound_type == IdentKind.Variable: - return Ok(IRVar(self.ident.matched_string)) - else: - return (sequence([t.make_ir(idents, False) for t in self.arguments]) - <= cast(Callable[[Iterable[IRTerm]], Tuple[IRTerm, ...]], tuple))\ - <= p(IRProp, self.ident.matched_string) - -@cur2 -def make_ir( - idents: IdentBindings, - clauses: Sequence[Sequence[ASTTerm]], -) -> Result[KnowledgeBase, GenIrError]: - return map_res( - lambda kb_: FSet(FSet(clause) for clause in kb_), - sequence([sequence([term.make_ir(idents, True) for term in clause]) for clause in clauses]) - ) - -def cons(stack: Sequence[Any]) -> Sequence[Any]: - match stack: - case [rest, head, *popped_stack]: - return ((head, *rest), *popped_stack) - case bad_stack: - raise Exception("Unexpected stack state!", bad_stack) - -nil: Sequence[Any] = tuple() -@cur2 -def introduce( - cons: Any, - stack: Sequence[Any] -) -> Sequence[Any]: - return (cons, *stack) - -def f_apply(stack: Sequence[Any]) -> Sequence[Any]: - match stack: - case [arg, func, *popped_stack] if hasattr(func, '__call__'): - return (func(arg), *popped_stack) - raise Exception("Unexpected stack state!", stack) -@cur2 -def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]: - match stack: - case [arg, *popped_stack]: - return (func(arg), *popped_stack) - case bad_stack: - raise Exception("Unexpected stack state!", bad_stack) - -def drop(stack: Sequence[Any]) -> Sequence[Any]: - return stack[1:] - -GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [ - (Variable.Start, - [ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,IdentBindings)), Tok.Newline, drop - , Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop - , Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop - , Tok.FunctionsSection, drop, Variable.Idents, f_apply, call_func(make_ir), Tok.Newline, drop - , Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ), - - (Variable.Idents, - [ Tok.Identifier, call_func(lambda i: i.matched_string), Variable.Idents, cons ]), - (Variable.Idents, - [ introduce(nil) ]), - - (Variable.Clauses, - [ Tok.Newline, drop, Variable.Clauses_ ]), - (Variable.Clauses, - [ introduce(nil) ]), - - (Variable.Clauses_, - [ Variable.Clause, Variable.Clauses, cons ]), - (Variable.Clauses_, - [ introduce(nil) ]), - - (Variable.Clause, - [ Variable.Term, Variable.Clause_, cons ]), - - (Variable.Clause_, - [ Variable.Clause ]), - (Variable.Clause_, - [ introduce(nil) ]), - - (Variable.Term, - [ Tok.Negate, call_func(cur2(ASTNegated)), Variable.Term, f_apply ]), - (Variable.Term, - [ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]), - - (Variable.Func, - [ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]), - (Variable.Func, - [ introduce(nil) ]), - - (Variable.CSTerms, - [ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]), - (Variable.CSTerms, - [ introduce(nil) ]), +LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [ + (compile(r"[\s\n]+"), Tok.Whitespace), + (compile(r"{"), Tok.OpenCurly), + (compile(r"}"), Tok.CloseCurly), + (compile(r"\["), Tok.OpenSquare), + (compile(r"\]"), Tok.CloseSquare), + (compile(r","), Tok.Comma), + (compile(r":"), Tok.Colon), + (compile(r'"[^"]*"'), Tok.String), + (compile(r'\d+'), Tok.Number), ] """ -Implements the following grammar: +A mapping of regexs to the tokens the identify -Start := PredicateSection Newline - VariablesSection Newline - ConstantsSection Newline - FunctionsSection Newline - ClausesSection Eof - -Idents := Identifier - := ε - -Clauses := Newline - := ε - -Clauses' := - := ε - -Clause := - -Clause' := - := ε - -Term := Negate - := Identifier - -Func? := OpenP CloseP - := ε - -CSTerms := Comma - := ε -""" - -def lex_and_parse(input: str) -> Result[Result[KnowledgeBase, GenIrError], Tuple[Lexeme[Tok], Collection[Tok]]]: - lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, input)) - - oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore - parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start) - match parser_(lexemes): - case Ok([Ok(ir)]): - return Ok(Ok(ir)) - case Ok([Err(err)]): - return Ok(Err(err)) - case Ok(huh): - raise Exception('Unexpected end result: ', huh) - case Err(e_tup): - return Err(e_tup) #type:ignore - raise Exception('Unreachable') - -if __name__ == '__main__': - # from emis_funky_funktions import cur2, flip - # from build_oracle import print_oracle_table_enum, oracle_table - # print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore - import sys - - if len(sys.argv) == 2: - with open(sys.argv[1]) as file: - match lex_and_parse(file.read()): - case Ok(Ok(ir)): - print('\n'.join(' or '.join(str(t) for t in c) for c in ir)) - case Ok(Err(err)): - print(err) - exit(102) - case Err((Lexeme(token, text, line, col_start, col_end), expected)): - print(f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}') - exit(101) - else: - print(f'Usage: python {sys.argv[0]} ') - exit(100) \ No newline at end of file +Tokens earlier on in the list should be regarded as higher priority, even if a match lower +on the list also matches. All unicode strings should be matched by at least one token. +""" \ No newline at end of file diff --git a/ir.py b/ir.py deleted file mode 100644 index 2842b80..0000000 --- a/ir.py +++ /dev/null @@ -1,281 +0,0 @@ -from emis_funky_funktions import * - -from dataclasses import dataclass -from functools import reduce -from typing import Collection, FrozenSet, Sequence, TypeAlias - -@dataclass(frozen=True) -class Subst: - """ - A substitution which may be performed on a term. - - Only variables may be substituted, but they may be substituted with any term. That - is, `x1/Chris` is valid, but `Chris/x1` is not. - - It is strongly recommended to avoid recursive substitutions, that is, substitutions - which contain the variable they are replacing - """ - variable: str - replacement: 'IRTerm' - def __str__(self) -> str: - return repr(self) - def __repr__(self) -> str: - return f'{self.replacement}/{self.variable}' - -Substitutions: TypeAlias = Sequence[Subst] - -@dataclass(frozen=True) -class UnificationMismatch: - """ - Indicates that two terms failed to unify - - Contains the two terms, each of which is a valid subterm of one of the two original - terms, and which do not superficially unify. - """ - term1: 'IRTerm' - term2: 'IRTerm' - -@dataclass(frozen=True) -class LengthMismatch: - """ - Indicates that two clauses/argument lists failed to unify due to a length mismatch - - Contains the first element of the two lists which didn't have a corresponding term on - the other side - """ - term: 'IRTerm' - -UnificationError = UnificationMismatch | LengthMismatch - -@dataclass(frozen=True) -class IRProp: - """ - Represents a proposition or object for resolution - - Can have any number of arguments, each of which should be a `IRTerm`. Note that no - distinction is made between predicates (n-arity, logical statements), functions - (positive-arity functions over objects), and constants (stand-ins for objects) - """ - - name: str - """ - The identifier of this thing, including its location in the source - """ - - arguments: 'Tuple[IRTerm, ...]' = tuple() - - def subst(self, subst: Subst) -> 'IRTerm': - """ - Perform substitution on a proposition - - Returns the same proposition, but with any instances of the variable named in the - substitution replaced with the contents of the substitution. - - >>> original = IRProp('angry', (IRVar('x1'),)) - >>> original - angry(*x1) - - >>> original.subst(Subst('x1', IRProp('Alex'))) - angry(Alex()) - """ - return IRProp(self.name, tuple(arg.subst(subst) for arg in self.arguments)) - def __str__(self) -> str: - return repr(self) - def __repr__(self) -> str: - return f'{self.name}({",".join(str(arg) for arg in self.arguments)})' - def __contains__(self, var: str) -> bool: - """ - Test if a variable with a given name exists in this term - - >>> 'x1' in IRProp('friends', [IRProp('John'), IRProp('mother_of', [IRVar('x1')])]) - True - """ - return any(var in term for term in self.arguments) - -@dataclass(frozen=True) -class IRVar: - """ - A variable which may be substituted for any other term - """ - name: str - def subst(self, subst: Subst) -> 'IRTerm': - """ - Perform substitution on a proposition - - Returns the same proposition, but with any instances of the variable named in the - substitution replaced with the contents of the substitution. - - >>> IRVar('x1').subst(Subst('x1', IRProp('Alex'))) - Alex() - - >>> IRVar('x1').subst(Subst('x2', IRProp('Alex'))) - *x1 - """ - if self.name == subst.variable: - return subst.replacement - else: - return self - def __str__(self) -> str: - return repr(self) - def __repr__(self) -> str: - return f'*{self.name}' - def __contains__(self, var: str) -> bool: - """ - Test if a variable with a given name exists in this term - - >>> 'x1' in IRVar('x1') - True - - >>> 'x1' in IRVar('x2') - False - """ - return var == self.name - -@dataclass(frozen=True) -class IRNeg: - """ - A negated proposition - """ - inner: 'IRTerm' - def subst(self, subst: Subst) -> 'IRTerm': - """ - Perform substitution on a proposition - - Returns the same proposition, but with any instances of the variable named in the - substitution replaced with the contents of the substitution. - - >>> original = IRNeg(IRProp('happy', [IRVar('x1')])) - >>> original - ¬happy(*x1) - - >>> original.subst(Subst('x1', IRProp('parent', [IRProp('Susie')]))) - ¬happy(parent(Susie())) - """ - return IRNeg(self.inner.subst(subst)) - def __str__(self) -> str: - return repr(self) - def __repr__(self) -> str: - return f'¬{self.inner}' - def __contains__(self, var: str) -> bool: - """ - Test if a variable with a given name exists in this term - - >>> 'x1' in IRNeg(IRProp('round', [IRVar('x1')])) - True - """ - return var in self.inner - -IRTerm: TypeAlias = IRVar | IRProp | IRNeg -Clause: TypeAlias = FrozenSet[IRTerm] -Clause_: TypeAlias = Collection[IRTerm] -""" -A more general definition of `Clause` which uses a collection rather than a frozen set - -Every `Clause` is a `Clause_`, but not vice versa. In other words, true `Clause` is a -subclass of `Clause_`. - -Due to this generalization, `Clause_` does not necessarily benefit from hashability or -deduplication. It exists mostly to make inputing things easier, e.g. in doctests. -""" - -KnowledgeBase: TypeAlias = FrozenSet[Clause] -KnowledgeBase_: TypeAlias = Collection[Clause_] -""" -A more general version of `KnowledgeBase` - -`KnowledgeBase_` : `KnowledgeBase` :: `Clause_` : `Clause` - -A superclass of `KnowledgeBase` -""" - -sub_all: Callable[[Substitutions, IRTerm], IRTerm] = p(reduce, lambda t, s: t.subst(s)) #type:ignore -""" -Perform a series of substitutions on a term - -Applies every substitution to the term in order - ->>> sub_all( -... [Subst('x1', IRVar('x2')), Subst('x2', IRProp('Karkat'))], -... IRProp('kismesis', [IRVar('x1'), IRVar('x2')]), -... ) -kismesis(Karkat(), Karkat()) -""" - -def unify(t1: IRTerm, t2: IRTerm) -> Result[Substitutions, UnificationError]: - """ - Attempt to find a substitution that unifies two terms - - If successful, the returned substitutions will cause both term to be equal, when - applied to both. - - If this method fails, then the pair of subterms which caused the unification to fail - are returned. - - >>> unify( - ... IRProp('imaginary', [IRProp('Rufio')]), - ... IRProp('imaginary', [IRVar('x1')]) - ... ) - Ok((Rufio()/x1,)) - - >>> unify( - ... IRProp('dating', [IRProp('Jade'), IRVar('x1')]), - ... IRProp('dating', [IRVar('x1'), IRProp('John')]) - ... ) - Err(UnificationMismatch(term1=Jade(), term2=John())) - - >>> unify( - ... IRProp('mother_of', [IRVar('x1')]), - ... IRVar('x1') - ... ) - Err(UnificationMismatch(term1=mother_of(*x1), term2=*x1)) - """ - match (t1, t2): - case (IRVar(v1), IRVar(v2)) if v1 == v2: - return Ok(tuple()) - case (IRVar(v), t_other) | (t_other, IRVar(v)) if v not in t_other: #type: ignore - return Ok((Subst(v, t_other),)) - case (IRProp(n1, a1), IRProp(n2, a2)) if n1 == n2 and len(a1) == len(a2): - return unify_lists(a1, a2) - case (IRNeg(i1), IRNeg(i2)): - return unify(i1, i2) - return Err(UnificationMismatch(t1, t2)) - -def unify_lists(c1: Sequence[IRTerm], c2: Sequence[IRTerm]) -> Result[Substitutions, UnificationError]: - """ - Attempt to perform unification on two term/argument lists - - See `unify()` for the details of how this works. When working with lists, the same - rules apply. The substitutions, when applied to every term of both lists, will - cause the lists to become exactly the same. - - Lists which are not the same length cannot be unified, and will always fail. - - Notice the difference between a list of `IRTerm`s and a `Clause`: Namely, that a - `Clause` is unordered, while a list of `IRTerm`s is ordered. - - >>> unify_lists( - ... [ IRProp('imaginary', [IRProp('Rufio')]), IRProp('friend', [IRVar('x1'), IRVar('x3')]) ], - ... [ IRProp('imaginary', [IRVar('x1')]), IRProp('friend', [IRVar('x2'), IRProp('Tavros')]) ] - ... ) - Ok((Rufio()/x1, Rufio()/x2, Tavros()/x3)) - - >>> unify_lists( - ... [ IRProp('imaginary', [IRProp('Rufio')]), IRProp('friend', [IRVar('x1'), IRVar('x3')]) ], - ... [ IRProp('imaginary', [IRVar('x1')]) ] - ... ) - Err(LengthMismatch(term=friend(Rufio(),*x3))) - """ - match (c1, c2): - case ([], []): - return Ok(tuple()) - case ([h1, *t1], [h2, *t2]): - return unify(h1, h2) << (lambda subs: - unify_lists((*map(p(sub_all,subs),t1),), (*map(p(sub_all,subs),t2),)) <= ( - lambda final_subs: (*subs, *final_subs))) - case ([h, *t], []) | ([], [h, *t]): - return Err(LengthMismatch(h)) - raise Exception('Unreachable') - -if __name__ == '__main__': - import doctest - doctest.testmod() \ No newline at end of file diff --git a/lab2.py b/lab2.py deleted file mode 100644 index f52952d..0000000 --- a/lab2.py +++ /dev/null @@ -1,36 +0,0 @@ -from emis_funky_funktions import * - -from typing import Sequence - -from grammar import lex_and_parse -from lex import Lexeme -from resolution import derives_false - -def main(args: Sequence[str]) -> Tuple[str, int]: - match args[1:]: - case [file]: - with open(file) as input: - match lex_and_parse(input.read()): - case Ok(Ok(ir_kb)): - return ( - 'no' - if derives_false(ir_kb) else - 'yes', - 0 - ) - case Ok(Err(gen_ir_err)): - return (str(gen_ir_err), 102) - case Err((Lexeme(token, text, line, col_start, col_end), expected)): - return ( - f'Parse error! Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}', - 101 - ) - case _: - return (f'Usage: python {args[0]} some_file.cnf', 100) - raise Exception('Unreachable') - -if __name__ == '__main__': - from sys import argv - out, code = main(argv) - print(out) - exit(code) \ No newline at end of file diff --git a/parse.py b/parse.py deleted file mode 100644 index c2f273a..0000000 --- a/parse.py +++ /dev/null @@ -1,141 +0,0 @@ -from emis_funky_funktions import * - -from dataclasses import dataclass -from functools import wraps -from operator import contains -from typing import Callable, Collection, Mapping, TypeGuard, TypeAlias - -def _expected(row: Mapping[B, Collection[Sequence[Any]]]) -> Collection[B]: - """ - Given a single row from an oracle table, identify the expected terminals - """ - return [ - terminal - for terminal, expansions in row.items() - if len(expansions) - ] - -Action: TypeAlias = Callable[[Sequence[C | D]], Sequence[C | D]] -def parser( - oracle: Mapping[A, Mapping[B, Collection[Sequence[A | B | Action]]]], - identify_lexeme: Callable[[D], B], - start_symbol: A, -) -> Callable[[Sequence[D]], Result[Sequence[C | D], Tuple[D, Collection[B]]]]: - """ - Produces a parser based on a grammar, an oracle, and a start symbol. - - The `identify_lexeme` argument should be a function which converts a lexeme into the - token that it represents. This allows for the actual lexemes that are being fed in to - be more complex, and store additional data. - - The oracle table my include "action" annotations in its sequences. Actions should be - an instance of `Action`, and should work on the AST stack. Every matched lexeme is - pushed to the AST stack. An action may transform this stack by popping some number of - items off of it, constructing some AST, pushing that AST back to the stack, and then - returning the modified stack. - - A couple things to note about this process: - - The stack that is passed to each action is immutable. "Modifications" should be - made by simply constructing and returning a new stack. - - The bottom of the stack is the zero index. - - If a parse is successful, the return value will be the AST stack at the end of the - parse. It is up the the caller to verify that this is an expected result. - - If a parse fails, the return value will be a tuple containing the erronious lexeme and - a collection of expected tokens which failed to match it. - - ### Example: - - We generate a simple grammar: - - >>> class SimpleVariable(IntEnum): - ... S = auto() - ... Sum = auto() - ... Sum_ = auto() - >>> class SimpleTerminal(IntEnum): - ... Number = auto() - ... Plus = auto() - ... Eof = auto() - ... def __repr__(self): - ... return self.name - >>> build_S = lambda x: x[1:] - >>> build_Sum = lambda x: (x[0](x[1][1]), *x[2:]) - >>> build_Sum_1 = lambda x: (lambda y: x[0] + y, *x[2:]) - >>> build_Sum_2 = lambda x: (lambda y: y, *x) - >>> grammar = [ - ... (SimpleVariable.S, [SimpleVariable.Sum, SimpleTerminal.Eof, build_S]), - ... (SimpleVariable.Sum, [SimpleTerminal.Number, SimpleVariable.Sum_, build_Sum]), - ... (SimpleVariable.Sum_, [SimpleTerminal.Plus, SimpleVariable.Sum, build_Sum_1]), - ... (SimpleVariable.Sum_, [build_Sum_2]), - ... ] - >>> is_term = p_instance(SimpleTerminal) - >>> is_var = p_instance(SimpleVariable) - >>> my_oracle_table = oracle_table(is_term, is_var, grammar) - >>> my_parser = parser(my_oracle_table, lambda x: x[0], SimpleVariable.S) - - >>> my_parser([ - ... (SimpleTerminal.Number, 1), - ... (SimpleTerminal.Plus,), - ... (SimpleTerminal.Number, 3), - ... (SimpleTerminal.Plus,), - ... (SimpleTerminal.Number, 10), - ... (SimpleTerminal.Eof,), - ... ]) - Ok((14,)) - - >>> my_parser([ - ... (SimpleTerminal.Number, 1), - ... (SimpleTerminal.Plus,), - ... (SimpleTerminal.Number, 3), - ... (SimpleTerminal.Number, 10), # <--- this is invalid! - ... (SimpleTerminal.Eof,), - ... ]) - Err(((Number, 10), [Plus, Eof])) - """ - is_var: Callable[[Any], TypeGuard[A]] = p_instance(start_symbol.__class__) - is_tok: Callable[[Any], TypeGuard[B]] = p_instance(next(iter(oracle[start_symbol].keys())).__class__) - def inner( - stack: Sequence[A | B | Action], - ast_stack: Sequence[C | D], - lexemes: Sequence[D], - ) -> Result[Sequence[C | D], Tuple[D, Collection[B]]]: - match stack: - # A [Variable] - case [top_of_stack, *popped_stack] if is_var(top_of_stack): - try: - expansions = oracle[top_of_stack][identify_lexeme(lexemes[0])] - except IndexError: - raise Exception('Unexpected end of input. Expected:', _expected(oracle[top_of_stack])) - match expansions: - case []: - return Err((lexemes[0], _expected(oracle[top_of_stack]))) - case [expansion]: - return inner((*expansion, *popped_stack), ast_stack, lexemes) - case _: - raise Exception('Not an LL(1) grammar!!!') - # B [Token] (match) - case [top_of_stack, *popped_stack] if is_tok(top_of_stack) and top_of_stack == identify_lexeme(lexemes[0]): - return inner(popped_stack, (lexemes[0], *ast_stack), lexemes[1:]) - # B [Token] (no match) - case [top_of_stack, *popped_stack] if is_tok(top_of_stack): - assert is_tok(top_of_stack) - return Err((lexemes[0], (top_of_stack,))) - # Action - case [f, *popped_stack]: - assert hasattr(f, '__call__') - return inner(popped_stack, f(ast_stack), lexemes) - # Empty stack (finished parsing) - case []: - if len(lexemes): - return Err((lexemes[0], [])) - else: - return Ok(ast_stack) - raise Exception('Unreachable!') - return wraps(parser)(p(inner, [start_symbol], [])) - -if __name__ == '__main__': - import doctest - from enum import auto, IntEnum - from build_oracle import oracle_table - doctest.testmod() \ No newline at end of file diff --git a/resolution.py b/resolution.py deleted file mode 100644 index 48ffc2f..0000000 --- a/resolution.py +++ /dev/null @@ -1,250 +0,0 @@ -from emis_funky_funktions import * - -from itertools import combinations, product -from operator import eq -from typing import Collection, FrozenSet, TypeAlias - -from ir import Clause, Clause_, IRNeg, IRProp, IRTerm, IRVar, KnowledgeBase, KnowledgeBase_, Substitutions, sub_all, unify - -def terms_cancel(t1: IRTerm, t2: IRTerm) -> Option[Substitutions]: - """ - Determine if two terms could cancel each other out, given the right substitutions - - That is, is there some set of substitutions such that t1 = ¬t2 or ¬t1 = t2? - - If negation is possible and a substitution exists, that substitution is returned. - Otherwise, `None` is returned. - - >>> terms_cancel(IRNeg(IRVar('x1')), IRProp('Terezi')) - Some((Terezi()/x1,)) - - >>> terms_cancel(IRProp('Nepeta'), IRVar('x1')) - Some((¬Nepeta()/x1,)) - - >>> terms_cancel(IRProp('ancestor', [IRVar('x1')]), IRVar('x1')) is None - True - """ - match (t1, t2): - case (IRNeg(a), b) | (b, IRNeg(a)): #type: ignore - return hush(unify(a, b)) - case (IRVar(_) as x, a) | (a, IRVar(_) as x): #type: ignore - return hush(unify(x, IRNeg(a))) - return None - -def merge_clauses(c1: Clause_, c2: Clause_) -> KnowledgeBase: - """ - Produce a list of all possible clauses which resolution could derive from c1 and c2 - - For each term in c1 that could cancel with a term in c2 using a substitution, a - possible clause is produced equal to the union of c1 and c2 with the canceled - terms removed and the substitution applied. - - >>> merge_clauses( - ... [ IRProp('day'), IRNeg(IRProp('night')) ], - ... [ IRProp('night') ] - ... ) - { { day() } } - - >>> merge_clauses( - ... [ IRNeg(IRProp('transgender', [IRVar('x1')])), IRProp('powerful', [IRVar('x1')]) ], - ... [ IRNeg(IRProp('powerful', [IRVar('x2')])), IRProp('god', [IRVar('x2')]) ] - ... ) - { { god(*x1), ¬transgender(*x1) } } - - >>> merge_clauses( - ... [ IRNeg(IRProp('day')), IRProp('night') ], - ... [ IRVar('x2') ] - ... ) - { { night() }, { ¬day() } } - - If two clauses cannot merge, an empty set is returned - - >>> merge_clauses( - ... [ IRProp('day') ], - ... [ IRProp('wet') ] - ... ) - { } - """ - terms1, terms2 = list(c1), list(c2) - valid_substitutions = drop_none( - map_opt(lambda subs: (subs, i1, i2), terms_cancel(t1, t2)) - for ((i1, t1), (i2, t2)) in product(enumerate(terms1), enumerate(terms2)) - ) - return FSet( - FSet( - sub_all(subs, term) - for term in (*terms1[:i1], *terms1[i1 + 1:], *terms2[:i2], *terms2[i2 + 1:]) - ) - for (subs, i1, i2) in valid_substitutions - ) - -def derive(clauses: KnowledgeBase_) -> KnowledgeBase: - """ - All possible clauses which derive in one step of resolution from a knowledge base - - Attempts to merge every possible combination of clauses, in the knowledge base. - - >>> derive([ - ... [IRNeg(IRProp('animal', [IRProp('Kim')]))], - ... [IRNeg(IRProp('dog', [IRVar('x0')])), IRProp('animal', [IRVar('x0')])], - ... [IRNeg(IRProp('cat', [IRVar('x1')])), IRProp('animal', [IRVar('x1')])], - ... [IRProp('dog', [IRProp('Kim')])], - ... ]) #doctest: +NORMALIZE_WHITESPACE - { - { animal(Kim()) }, - { ¬cat(Kim()) }, - { ¬dog(Kim()) } - } - """ - return FSet( - FSet(clause) - for (c1, c2) in combinations(clauses, 2) - for clause in merge_clauses(c1, c2) - ) - -def derive2(kb1: KnowledgeBase_, kb2: KnowledgeBase_) -> KnowledgeBase: - """ - All clauses which derive in one step from the combination of two knowledge bases - - Each resulting clause is the combination of one clause from the left knowledge base - with exactly one clause from the right knowledge base. Clauses from the same - knowledge base will not be combined. - - >>> derive2([ - ... [IRNeg(IRProp('animal', [IRProp('Kim')]))], - ... [IRNeg(IRProp('cat', [IRVar('x1')])), IRProp('animal', [IRVar('x1')])], - ... ], [ - ... [IRNeg(IRProp('dog', [IRVar('x0')])), IRProp('animal', [IRVar('x0')])], - ... [IRProp('dog', [IRProp('Kim')])], - ... ]) - { { ¬dog(Kim()) } } - """ - return FSet( - FSet(clause) - for (c1, c2) in product(kb1, kb2) - for clause in merge_clauses(c1, c2) - ) - -false_clause: Clause = FSet() -""" -The clause which represents the logical condition False - -i.e. the empty clause -""" - - -def next_generation( - previous_generations: KnowledgeBase, - current_generation: KnowledgeBase, -) -> KnowledgeBase: - """ - Advance resolution by one step - - This performs `derive()` on the current generation as well as `derive2()` between the - current generation and all clauses from previous generations in order to produce a new - generation, hopefully with new and interesting clauses in it. - - When using the new generation, previous generations should be considered to be the sum - of all clauses within the two knowledge bases passed to this function. That is, all - clauses which were used in the generation of that generation. - - ### Example - - Starting from an example knowledge base, we produce a new generation. Notice that the - empty list for previous generations indicates that this is the zeroth (original) - knowledge base. - - >>> next_generation(fset( - ... ), fset( - ... fset( IRNeg(IRProp('animal', (IRProp('Kim'),))) ), - ... fset( IRNeg(IRProp('dog', (IRVar('x0'),))), IRProp('animal', (IRVar('x0'),)) ), - ... fset( IRNeg(IRProp('cat', (IRVar('x1'),))), IRProp('animal', (IRVar('x1'),)) ), - ... fset( IRProp('dog', (IRProp('Kim'),)) ), - ... )) #doctest: +NORMALIZE_WHITESPACE - { - { animal(Kim()) }, - { ¬cat(Kim()) }, - { ¬dog(Kim()) } - } - - Now, we perform the next round of resolution. All of the terms originally passed into - the first generation have been moved to the previous generations knowledge base, and - the current generation contains only the results from the first call. - - >>> next_generation(fset( - ... fset( IRNeg(IRProp('animal', (IRProp('Kim'),))) ), - ... fset( IRNeg(IRProp('dog', (IRVar('x0'),))), IRProp('animal', (IRVar('x0'),)) ), - ... fset( IRNeg(IRProp('cat', (IRVar('x1'),))), IRProp('animal', (IRVar('x1'),)) ), - ... fset( IRProp('dog', (IRProp('Kim'),)) ), - ... ), fset( - ... fset( IRNeg(IRProp('dog', (IRProp('Kim'),))) ), - ... fset( IRNeg(IRProp('cat', (IRProp('Kim'),))) ), - ... fset( IRProp('animal', (IRProp('Kim'),)) ), - ... )) - { { } } - - The return of a knowledge base containing false (ie `[]`) indicates that resolution - has found a contradiction. In this case, there are two: One from ¬dog(Kim) and - dog(Kim), and one from animal(Kim) and ¬animal(Kim). - - Excluded from the next generation is any term which exists in a previous generation. - For example: - - >>> next_generation(fset( - ... ), fset( - ... fset( IRNeg(IRProp('day')), IRProp('day') ), - ... fset( IRProp('day') ), - ... )) - { } - - This configuration could theoretically derive day() again, however, it does not, - because that is already known, and is present in the current generation. - """ - return FSet( - (derive(current_generation) | derive2(current_generation, previous_generations)) - - current_generation - - previous_generations - ) -def derives_false( - knowledge_base: KnowledgeBase, - previous_generations: KnowledgeBase = FSet(), -) -> bool: - """ - Determine if it is possible to derive false from a given knowledge base. - - Uses any number of generations of resolution via `next_generation()` to find a - generation which contains the `false_clause`, indicating that the original knowledge - base (and all subsiquent generations) are contradictory. - - `previous_generations` may be set if the current knowledge base was derived from some - other series of clauses using `derive()`. - - >>> derives_false(fset( - ... fset( IRNeg(IRProp('animal', (IRProp('Kim'),))) ), - ... fset( IRNeg(IRProp('dog', (IRVar('x0'),))), IRProp('animal', (IRVar('x0'),)) ), - ... fset( IRNeg(IRProp('cat', (IRVar('x1'),))), IRProp('animal', (IRVar('x1'),)) ), - ... fset( IRProp('dog', (IRProp('Kim'),)) ), - ... )) - True - - >>> derives_false(fset( - ... fset( IRNeg(IRProp('animal', (IRProp('Kim'),))) ), - ... fset( IRNeg(IRProp('dog', (IRVar('x0'),))), IRProp('animal', (IRVar('x0'),)) ), - ... fset( IRNeg(IRProp('cat', (IRVar('x1'),))), IRProp('animal', (IRVar('x1'),)) ), - ... )) - False - """ - if false_clause in knowledge_base: - return True - elif len(knowledge_base) == 0: - return False - else: - return derives_false( - next_generation(previous_generations, knowledge_base), - knowledge_base | previous_generations - ) - - -if __name__ == '__main__': - import doctest - doctest.testmod() \ No newline at end of file diff --git a/tokens.py b/tokens.py deleted file mode 100644 index 897501d..0000000 --- a/tokens.py +++ /dev/null @@ -1,45 +0,0 @@ -from enum import auto, IntEnum -from typing import Collection, Tuple -from re import compile, Pattern - -class Tok(IntEnum): - """ - All possible tokens used in the grammar - """ - Newline = auto() - Whitespace = auto() - PredicateSection = auto() - VariablesSection = auto() - ConstantsSection = auto() - FunctionsSection = auto() - ClausesSection = auto() - Negate = auto() - OpenP = auto() - CloseP = auto() - Comma = auto() - Identifier = auto() - Eof = auto() - - def __repr__(self): - return self._name_ - -LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [ - (compile(r"\n"), Tok.Newline), - (compile(r"[ \t]+"), Tok.Whitespace), - (compile("Predicates:"), Tok.PredicateSection), - (compile("Variables:"), Tok.VariablesSection), - (compile("Constants:"), Tok.ConstantsSection), - (compile("Functions:"), Tok.FunctionsSection), - (compile("Clauses:"), Tok.ClausesSection), - (compile("!"), Tok.Negate), - (compile(r"\("), Tok.OpenP), - (compile(r"\)"), Tok.CloseP), - (compile(","), Tok.Comma), - (compile(r"\w+"), Tok.Identifier), -] -""" -A mapping of regexs to the tokens the identify - -Tokens earlier on in the list should be regarded as higher priority, even if a match lower -on the list also matches. All unicode strings should be matched by at least one token. -""" \ No newline at end of file