"""
A grammar for parsing CNF files

If this module is run directly in python, it will spit out valid python which produces an
oracle table for the grammar it defines.  It's recommended that this be done using
`build_oracle.sh` instead, however, which will build a whole python module containing the
oracle table, complete with imports.
"""
from emis_funky_funktions import *

from dataclasses import dataclass
from enum import auto, IntEnum
from re import compile, Pattern

from lex import Lexeme
from parse import Action

from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, TypeAlias

class Tok(IntEnum):
	"""
	All possible tokens used in the grammar
	"""
	Newline = auto()
	Whitespace = auto()
	PredicateSection = auto()
	VariablesSection = auto()
	ConstantsSection = auto()
	FunctionsSection = auto()
	ClausesSection = auto()
	Negate = auto()
	OpenP = auto()
	CloseP = auto()
	Comma = auto()
	Identifier = auto()
	Eof = auto()

	def __repr__(self):
		return self._name_

LEX_TABLE: Collection[Tuple[Pattern[str], Tok]] = [
	(compile(r"\n"), Tok.Newline),
	(compile(r"[ \t]+"), Tok.Whitespace),
	(compile("Predicates:"), Tok.PredicateSection),
	(compile("Variables:"), Tok.VariablesSection),
	(compile("Constants:"), Tok.ConstantsSection),
	(compile("Functions:"), Tok.FunctionsSection),
	(compile("Clauses:"), Tok.ClausesSection),
	(compile("!"), Tok.Negate),
	(compile(r"\("), Tok.OpenP),
	(compile(r"\)"), Tok.CloseP),
	(compile(","), Tok.Comma),
	(compile(r"\w+"), Tok.Identifier),
]
"""
A mapping of regexs to the tokens the identify

Tokens earlier on in the list should be regarded as higher priority, even if a match lower
on the list also matches.  All unicode strings should be matched by at least one token.
"""

class Variable(IntEnum):
	Start = auto()
	Idents = auto()
	Clauses = auto()
	Clauses_ = auto()
	Clause = auto()
	Clause_ = auto()
	Term = auto()
	Func = auto()
	CSTerms = auto()

	def __repr__(self) -> str:
		return f'<{self._name_}>'

ASTTerm: TypeAlias = 'ASTNegated | ASTProp'

@dataclass(frozen=True)
class ArgumentsForVariable:
    term: Lexeme[Tok]

@dataclass(frozen=True)
class UnidentifiedVariable:
    term: Lexeme[Tok]

GenIrError: TypeAlias = ArgumentsForVariable | UnidentifiedVariable

@dataclass(frozen=True)
class ASTNegated:
    term: ASTTerm

    def make_ir(self, props: Sequence[str], var: Sequence[str]) -> 'Result[IRTerm, GenIrError]':
        return map_res(IRNeg, self.term.make_ir(props, var))

@dataclass(frozen=True)
class ASTProp:
    ident: Lexeme[Tok]
    arguments: Sequence[ASTTerm]

    def make_ir(self, props: Sequence[str], vars: Sequence[str]) -> 'Result[IRTerm, GenIrError]':
        if self.ident.matched_string in props:
            return map_res(p(IRProp, self.ident), sequence([t.make_ir(props, vars) for t in self.arguments]))
        elif self.ident.matched_string in vars:
            if len(self.arguments):
                return Err(ArgumentsForVariable(self.ident))
            else:
                return Ok(IRVar(self.ident))
        else:
            return Err(UnidentifiedVariable(self.ident))

@dataclass(frozen=True)
class IRProp:
    lexeme: Lexeme[Tok]
    arguments: 'Sequence[IRTerm]'
    def __str__(self) -> str:
        return f'{self.lexeme.matched_string}({",".join(str(arg) for arg in self.arguments)})'

@dataclass(frozen=True)
class IRVar:
    lexeme: Lexeme[Tok]
    def __str__(self) -> str:
        return f'*{self.lexeme.matched_string}'

@dataclass(frozen=True)
class IRNeg:
    inner: 'IRTerm'
    def __str__(self) -> str:
        return f'¬{self.inner}'

IRTerm: TypeAlias = IRVar | IRProp | IRNeg

def make_ir(
    predicate_idents: Sequence[Lexeme[Tok]],
    variable_idents: Sequence[Lexeme[Tok]],
    const_idents: Sequence[Lexeme[Tok]],
    func_idents: Sequence[Lexeme[Tok]],
    clauses: Sequence[Sequence[ASTTerm]],
) -> Result[Sequence[Sequence[IRTerm]], GenIrError]:
    prop_idents = [l.matched_string for l in (*const_idents, *func_idents, *predicate_idents)]
    var_idents = [l.matched_string for l in variable_idents]
    return sequence([sequence([term.make_ir(prop_idents, var_idents) for term in clause]) for clause in clauses])

def cons(stack: Sequence[Any]) -> Sequence[Any]:
    match stack:
        case [rest, head, *popped_stack]:
            return ((head, *rest), *popped_stack)
        case bad_stack:
            raise Exception("Unexpected stack state!", bad_stack)

nil: Sequence[Any] = tuple()
@cur2
def introduce(
    cons: Any,
    stack: Sequence[Any]
) -> Sequence[Any]:
    return (cons, *stack)

def f_apply(stack: Sequence[Any]) -> Sequence[Any]:
    match stack:
        case [arg, func, *popped_stack] if hasattr(func, '__call__'):
            return (func(arg), *popped_stack)
    raise Exception("Unexpected stack state!", stack)
@cur2
def call_func(func: Callable[[Any], Any], stack: Sequence[Any]) -> Sequence[Any]:
    match stack:
        case [arg, *popped_stack]:
            return (func(arg), *popped_stack)
        case bad_stack:
            raise Exception("Unexpected stack state!", bad_stack)

def drop(stack: Sequence[Any]) -> Sequence[Any]:
    return stack[1:]

GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
	(Variable.Start,
		 [ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,make_ir)), Tok.Newline, drop
		 , Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
		 , Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
		 , Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
		 , Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),

	(Variable.Idents,
		[ Tok.Identifier, Variable.Idents, cons ]),
	(Variable.Idents,
		[ introduce(nil) ]),

	(Variable.Clauses,
		[ Tok.Newline, drop, Variable.Clauses_ ]),
	(Variable.Clauses,
		[ introduce(nil) ]),

	(Variable.Clauses_,
		[ Variable.Clause, Variable.Clauses, cons ]),
	(Variable.Clauses_,
		[ introduce(nil) ]),

	(Variable.Clause,
		[ Variable.Term, Variable.Clause_, cons ]),

	(Variable.Clause_,
		[ Variable.Clause ]),
	(Variable.Clause_,
		[ introduce(nil) ]),

	(Variable.Term,
		[ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]),
	(Variable.Term,
		[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),

	(Variable.Func,
		[ Tok.OpenP, drop, Variable.Term, Variable.CSTerms, cons, Tok.CloseP, drop ]),
	(Variable.Func,
		[ introduce(nil) ]),

	(Variable.CSTerms,
		[ Tok.Comma, drop, Variable.Term, Variable.CSTerms, cons ]),
	(Variable.CSTerms,
		[ introduce(nil) ]),
]
"""
Implements the following grammar:

Start	 := PredicateSection <Idents> Newline
			VariablesSection <Idents> Newline
			ConstantsSection <Idents> Newline
			FunctionsSection <Idents> Newline
			ClausesSection <Clauses> Eof

Idents	 := Identifier <Idents>
		 := ε

Clauses  := Newline <Clauses'>
		 := ε

Clauses' := <Clause> <Clauses>
		 := ε

Clause	 := <Term> <Clause'>

Clause'  := <Clause>
		 := ε

Term	 := Negate <Term>
		 := Identifier <Func?>

Func?	 := OpenP <Term> <CSTerms> CloseP
		 := ε

CSTerms  := Comma <Term> <CSTerms>
		 := ε
"""

if __name__ == '__main__':
    # from emis_funky_funktions import cur2, flip
	# from build_oracle import print_oracle_table_enum, oracle_table
	# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
    from build_oracle import oracle_table
    from parse import parser
    from lex import tokenize

    with open('sample.cnf') as file:
        lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))

        oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
        parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
        maybe_ast = parser_(lexemes)

        match maybe_ast:
            case Ok([Ok(ast)]):
                print('\n'.join(' or '.join(str(t) for t in c) for c in ast))
            case Ok([Err(ArgumentsForVariable(v))]):
                print(f'Semantic error: Arguments listed for variable {repr(v.matched_string)} on line {v.line}:{v.col_start}-{v.col_end}')
            case Ok([Err(UnidentifiedVariable(v))]):
                print(f'Semantic error: Unidentified identifier {repr(v.matched_string)} on line {v.line}:{v.col_start}-{v.col_end}')
            case Ok(huh):
                print('Unexpected end result: ', huh)
            case Err((Lexeme(token, text, line, col_start, col_end), expected)):
                print(f'Parse error!  Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')