From 98253bb2547e232f77aeac229bcd2440a8e809dd Mon Sep 17 00:00:00 2001
From: Emi Simpson <emi@alchemi.dev>
Date: Sun, 5 Mar 2023 16:00:07 -0500
Subject: [PATCH] Souped up error checking

---
 grammar.py | 165 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 115 insertions(+), 50 deletions(-)

diff --git a/grammar.py b/grammar.py
index 7a1235d..29b897c 100644
--- a/grammar.py
+++ b/grammar.py
@@ -75,38 +75,109 @@ class Variable(IntEnum):
 
 ASTTerm: TypeAlias = 'ASTNegated | ASTProp'
 
+class IdentKind(IntEnum):
+	Function = auto()
+	Constant = auto()
+	Variable = auto()
+	Predicate = auto()
+
 @dataclass(frozen=True)
-class ArgumentsForVariable:
-    term: Lexeme[Tok]
+class CallingNonFunc:
+	term: Lexeme[Tok]
+	obj_type: IdentKind
+	def __str__(self):
+		return f'Semantic error: Attempted to call {repr(self.term.matched_string)} (a {self.obj_type.name.lower()}) with arguments on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
+
+@dataclass(frozen=True)
+class MissingArguments:
+	term: Lexeme[Tok]
+	def __str__(self):
+		return f'Semantic error: The function {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end} is missing arguments!'
 
 @dataclass(frozen=True)
 class UnidentifiedVariable:
-    term: Lexeme[Tok]
+	term: Lexeme[Tok]
+	def __str__(self):
+		return f'Semantic error: Unidentified identifier {repr(self.term.matched_string)} on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
 
-GenIrError: TypeAlias = ArgumentsForVariable | UnidentifiedVariable
+@dataclass(frozen=True)
+class PropUsedInObjectContext:
+	term: Lexeme[Tok]
+	def __str__(self):
+		return f'Semantic error: The proposition {repr(self.term.matched_string)} was used in a context where an object was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
+
+@dataclass(frozen=True)
+class ObjectUsedInPropContext:
+	term: Lexeme[Tok]
+	obj_type: IdentKind
+	def __str__(self):
+		return f'Semantic error: The {self.obj_type.name.lower()} {repr(self.term.matched_string)} was used in a context where a proposition was expected on line {self.term.line}:{self.term.col_start}-{self.term.col_end}'
+
+@dataclass(frozen=True)
+class NegationOfObject:
+	line: int
+	col: int
+	def __str__(self):
+		return f'Semantic error: Attempted to use negation in a context where working on objects on line {self.line}:{self.col}'
+
+GenIrError: TypeAlias = CallingNonFunc | MissingArguments | UnidentifiedVariable | PropUsedInObjectContext | ObjectUsedInPropContext | NegationOfObject
+
+@dataclass(frozen=True)
+class IdentBindings:
+    predicate_idents: Sequence[str]
+    variable_idents: Sequence[str]
+    const_idents: Sequence[str]
+    func_idents: Sequence[str]
 
 @dataclass(frozen=True)
 class ASTNegated:
-    term: ASTTerm
+	neg_lexeme: Lexeme[Tok]
+	term: ASTTerm
 
-    def make_ir(self, props: Sequence[str], var: Sequence[str]) -> 'Result[IRTerm, GenIrError]':
-        return map_res(IRNeg, self.term.make_ir(props, var))
+	def make_ir(self, idents: IdentBindings, is_prop: bool) -> 'Result[IRTerm, GenIrError]':
+		if is_prop:
+			return map_res(IRNeg, self.term.make_ir(idents, True))
+		else:
+			return Err(NegationOfObject(self.neg_lexeme.line, self.neg_lexeme.col_start))
 
 @dataclass(frozen=True)
 class ASTProp:
-    ident: Lexeme[Tok]
-    arguments: Sequence[ASTTerm]
+	ident: Lexeme[Tok]
+	arguments: Sequence[ASTTerm]
 
-    def make_ir(self, props: Sequence[str], vars: Sequence[str]) -> 'Result[IRTerm, GenIrError]':
-        if self.ident.matched_string in props:
-            return map_res(p(IRProp, self.ident), sequence([t.make_ir(props, vars) for t in self.arguments]))
-        elif self.ident.matched_string in vars:
-            if len(self.arguments):
-                return Err(ArgumentsForVariable(self.ident))
-            else:
-                return Ok(IRVar(self.ident))
-        else:
-            return Err(UnidentifiedVariable(self.ident))
+	def make_ir(self, idents: IdentBindings, is_pred: bool) -> 'Result[IRTerm, GenIrError]':
+		bound_type = (
+			IdentKind.Predicate
+			if self.ident.matched_string in idents.predicate_idents else
+			IdentKind.Variable
+			if self.ident.matched_string in idents.variable_idents else
+			IdentKind.Constant
+			if self.ident.matched_string in idents.const_idents else
+			IdentKind.Function
+			if self.ident.matched_string in idents.func_idents else
+			None
+		)
+		if bound_type is None:
+			return Err(UnidentifiedVariable(self.ident))
+
+		if is_pred:
+			if bound_type != IdentKind.Predicate:
+				return Err(ObjectUsedInPropContext(self.ident, bound_type))
+		else:
+			if bound_type == IdentKind.Function:
+				if not len(self.arguments):
+					return Err(MissingArguments(self.ident))
+			elif bound_type == IdentKind.Predicate:
+				return Err(PropUsedInObjectContext(self.ident))
+			else:
+				if len(self.arguments):
+					return Err(CallingNonFunc(self.ident, bound_type))
+
+		if bound_type == IdentKind.Variable:
+			return Ok(IRVar(self.ident))
+		else:
+			arg_ir = sequence([t.make_ir(idents, False) for t in self.arguments])
+			return map_res(p(IRProp, self.ident), arg_ir)
 
 @dataclass(frozen=True)
 class IRProp:
@@ -129,16 +200,12 @@ class IRNeg:
 
 IRTerm: TypeAlias = IRVar | IRProp | IRNeg
 
+@cur2
 def make_ir(
-    predicate_idents: Sequence[Lexeme[Tok]],
-    variable_idents: Sequence[Lexeme[Tok]],
-    const_idents: Sequence[Lexeme[Tok]],
-    func_idents: Sequence[Lexeme[Tok]],
+	idents: IdentBindings,
     clauses: Sequence[Sequence[ASTTerm]],
 ) -> Result[Sequence[Sequence[IRTerm]], GenIrError]:
-    prop_idents = [l.matched_string for l in (*const_idents, *func_idents, *predicate_idents)]
-    var_idents = [l.matched_string for l in variable_idents]
-    return sequence([sequence([term.make_ir(prop_idents, var_idents) for term in clause]) for clause in clauses])
+    return sequence([sequence([term.make_ir(idents, True) for term in clause]) for clause in clauses])
 
 def cons(stack: Sequence[Any]) -> Sequence[Any]:
     match stack:
@@ -173,14 +240,14 @@ def drop(stack: Sequence[Any]) -> Sequence[Any]:
 
 GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
 	(Variable.Start,
-		 [ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,p,make_ir)), Tok.Newline, drop
+		 [ Tok.PredicateSection, drop, Variable.Idents, call_func(p(p,p,p,IdentBindings)), Tok.Newline, drop
 		 , Tok.VariablesSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
 		 , Tok.ConstantsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
-		 , Tok.FunctionsSection, drop, Variable.Idents, f_apply, Tok.Newline, drop
+		 , Tok.FunctionsSection, drop, Variable.Idents, f_apply, call_func(make_ir), Tok.Newline, drop
 		 , Tok.ClausesSection, drop, Variable.Clauses, f_apply, Tok.Eof, drop] ),
 
 	(Variable.Idents,
-		[ Tok.Identifier, Variable.Idents, cons ]),
+		[ Tok.Identifier, call_func(lambda i: i.matched_string), Variable.Idents, cons ]),
 	(Variable.Idents,
 		[ introduce(nil) ]),
 
@@ -203,7 +270,7 @@ GRAMMAR: Sequence[Tuple[Variable, Sequence[Variable | Tok | Action]]] = [
 		[ introduce(nil) ]),
 
 	(Variable.Term,
-		[ Tok.Negate, drop, Variable.Term, call_func(ASTNegated) ]),
+		[ Tok.Negate, call_func(cur2(ASTNegated)), Variable.Term, f_apply ]),
 	(Variable.Term,
 		[ Tok.Identifier, call_func(cur2(ASTProp)), Variable.Func, f_apply ]),
 
@@ -251,28 +318,26 @@ CSTerms  := Comma <Term> <CSTerms>
 """
 
 if __name__ == '__main__':
-    # from emis_funky_funktions import cur2, flip
+	# from emis_funky_funktions import cur2, flip
 	# from build_oracle import print_oracle_table_enum, oracle_table
 	# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
-    from build_oracle import oracle_table
-    from parse import parser
-    from lex import tokenize
+	from build_oracle import oracle_table
+	from parse import parser
+	from lex import tokenize
 
-    with open('sample.cnf') as file:
-        lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))
+	with open('sample.cnf') as file:
+		lexemes = unwrap_r(tokenize(LEX_TABLE, [Tok.Whitespace], Tok.Eof, file.read()))
 
-        oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
-        parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
-        maybe_ast = parser_(lexemes)
+		oracle_table_ = oracle_table(p_instance(Tok), p_instance(Variable), GRAMMAR) #type:ignore
+		parser_ = parser(oracle_table_, flip(cur2(getattr))('token'), Variable.Start)
+		maybe_ast = parser_(lexemes)
 
-        match maybe_ast:
-            case Ok([Ok(ast)]):
-                print('\n'.join(' or '.join(str(t) for t in c) for c in ast))
-            case Ok([Err(ArgumentsForVariable(v))]):
-                print(f'Semantic error: Arguments listed for variable {repr(v.matched_string)} on line {v.line}:{v.col_start}-{v.col_end}')
-            case Ok([Err(UnidentifiedVariable(v))]):
-                print(f'Semantic error: Unidentified identifier {repr(v.matched_string)} on line {v.line}:{v.col_start}-{v.col_end}')
-            case Ok(huh):
-                print('Unexpected end result: ', huh)
-            case Err((Lexeme(token, text, line, col_start, col_end), expected)):
-                print(f'Parse error!  Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')
\ No newline at end of file
+		match maybe_ast:
+			case Ok([Ok(ast)]):
+				print('\n'.join(' or '.join(str(t) for t in c) for c in ast))
+			case Ok([Err(err)]):
+				print(err)
+			case Ok(huh):
+				print('Unexpected end result: ', huh)
+			case Err((Lexeme(token, text, line, col_start, col_end), expected)):
+				print(f'Parse error!  Line {line}:{col_start}-{col_end}\n\nGot: {repr(text)}\nExpected: {expected}')
\ No newline at end of file