2023-03-04 17:47:56 +00:00
"""
A grammar for parsing CNF files
If this module is run directly in python , it will spit out valid python which produces an
oracle table for the grammar it defines . It ' s recommended that this be done using
` build_oracle . sh ` instead , however , which will build a whole python module containing the
oracle table , complete with imports .
"""
2023-03-05 03:02:16 +00:00
from emis_funky_funktions import *
from dataclasses import dataclass
2023-03-04 17:42:43 +00:00
from enum import auto , IntEnum
from re import compile , Pattern
2023-03-05 03:02:16 +00:00
from lex import Lexeme
from parse import Action
from typing import Any , Callable , Collection , Mapping , Sequence , Tuple , TypeAlias
2023-03-04 17:42:43 +00:00
class Tok ( IntEnum ) :
"""
All possible tokens used in the grammar
"""
Newline = auto ( )
Whitespace = auto ( )
PredicateSection = auto ( )
VariablesSection = auto ( )
ConstantsSection = auto ( )
FunctionsSection = auto ( )
ClausesSection = auto ( )
Negate = auto ( )
OpenP = auto ( )
CloseP = auto ( )
Comma = auto ( )
Identifier = auto ( )
Eof = auto ( )
def __repr__ ( self ) :
return self . _name_
LEX_TABLE : Collection [ Tuple [ Pattern [ str ] , Tok ] ] = [
( compile ( r " \ n " ) , Tok . Newline ) ,
( compile ( r " [ \ t]+ " ) , Tok . Whitespace ) ,
( compile ( " Predicates: " ) , Tok . PredicateSection ) ,
( compile ( " Variables: " ) , Tok . VariablesSection ) ,
( compile ( " Constants: " ) , Tok . ConstantsSection ) ,
( compile ( " Functions: " ) , Tok . FunctionsSection ) ,
( compile ( " Clauses: " ) , Tok . ClausesSection ) ,
( compile ( " ! " ) , Tok . Negate ) ,
( compile ( r " \ ( " ) , Tok . OpenP ) ,
( compile ( r " \ ) " ) , Tok . CloseP ) ,
( compile ( " , " ) , Tok . Comma ) ,
( compile ( r " \ w+ " ) , Tok . Identifier ) ,
]
"""
A mapping of regexs to the tokens the identify
Tokens earlier on in the list should be regarded as higher priority , even if a match lower
on the list also matches . All unicode strings should be matched by at least one token .
"""
class Variable ( IntEnum ) :
Start = auto ( )
Idents = auto ( )
Clauses = auto ( )
Clauses_ = auto ( )
Clause = auto ( )
Clause_ = auto ( )
Term = auto ( )
Func = auto ( )
CSTerms = auto ( )
def __repr__ ( self ) - > str :
return f ' < { self . _name_ } > '
2023-03-05 03:02:16 +00:00
ASTTerm : TypeAlias = ' ASTNegated | ASTProp '
2023-03-05 21:00:07 +00:00
class IdentKind ( IntEnum ) :
Function = auto ( )
Constant = auto ( )
Variable = auto ( )
Predicate = auto ( )
@dataclass ( frozen = True )
class CallingNonFunc :
term : Lexeme [ Tok ]
obj_type : IdentKind
def __str__ ( self ) :
return f ' Semantic error: Attempted to call { repr ( self . term . matched_string ) } (a { self . obj_type . name . lower ( ) } ) with arguments on line { self . term . line } : { self . term . col_start } - { self . term . col_end } '
2023-03-05 04:43:26 +00:00
@dataclass ( frozen = True )
2023-03-05 21:00:07 +00:00
class MissingArguments :
term : Lexeme [ Tok ]
def __str__ ( self ) :
return f ' Semantic error: The function { repr ( self . term . matched_string ) } on line { self . term . line } : { self . term . col_start } - { self . term . col_end } is missing arguments! '
2023-03-05 04:43:26 +00:00
@dataclass ( frozen = True )
class UnidentifiedVariable :
2023-03-05 21:00:07 +00:00
term : Lexeme [ Tok ]
def __str__ ( self ) :
return f ' Semantic error: Unidentified identifier { repr ( self . term . matched_string ) } on line { self . term . line } : { self . term . col_start } - { self . term . col_end } '
@dataclass ( frozen = True )
class PropUsedInObjectContext :
term : Lexeme [ Tok ]
def __str__ ( self ) :
return f ' Semantic error: The proposition { repr ( self . term . matched_string ) } was used in a context where an object was expected on line { self . term . line } : { self . term . col_start } - { self . term . col_end } '
@dataclass ( frozen = True )
class ObjectUsedInPropContext :
term : Lexeme [ Tok ]
obj_type : IdentKind
def __str__ ( self ) :
return f ' Semantic error: The { self . obj_type . name . lower ( ) } { repr ( self . term . matched_string ) } was used in a context where a proposition was expected on line { self . term . line } : { self . term . col_start } - { self . term . col_end } '
@dataclass ( frozen = True )
class NegationOfObject :
line : int
col : int
def __str__ ( self ) :
return f ' Semantic error: Attempted to use negation in a context where working on objects on line { self . line } : { self . col } '
2023-03-05 04:43:26 +00:00
2023-03-05 21:00:07 +00:00
GenIrError : TypeAlias = CallingNonFunc | MissingArguments | UnidentifiedVariable | PropUsedInObjectContext | ObjectUsedInPropContext | NegationOfObject
@dataclass ( frozen = True )
class IdentBindings :
predicate_idents : Sequence [ str ]
variable_idents : Sequence [ str ]
const_idents : Sequence [ str ]
func_idents : Sequence [ str ]
2023-03-05 04:43:26 +00:00
2023-03-05 03:02:16 +00:00
@dataclass ( frozen = True )
class ASTNegated :
2023-03-05 21:00:07 +00:00
neg_lexeme : Lexeme [ Tok ]
term : ASTTerm
2023-03-05 03:02:16 +00:00
2023-03-05 21:00:07 +00:00
def make_ir ( self , idents : IdentBindings , is_prop : bool ) - > ' Result[IRTerm, GenIrError] ' :
if is_prop :
return map_res ( IRNeg , self . term . make_ir ( idents , True ) )
else :
return Err ( NegationOfObject ( self . neg_lexeme . line , self . neg_lexeme . col_start ) )
2023-03-05 03:02:16 +00:00
@dataclass ( frozen = True )
class ASTProp :
2023-03-05 21:00:07 +00:00
ident : Lexeme [ Tok ]
arguments : Sequence [ ASTTerm ]
def make_ir ( self , idents : IdentBindings , is_pred : bool ) - > ' Result[IRTerm, GenIrError] ' :
bound_type = (
IdentKind . Predicate
if self . ident . matched_string in idents . predicate_idents else
IdentKind . Variable
if self . ident . matched_string in idents . variable_idents else
IdentKind . Constant
if self . ident . matched_string in idents . const_idents else
IdentKind . Function
if self . ident . matched_string in idents . func_idents else
None
)
if bound_type is None :
return Err ( UnidentifiedVariable ( self . ident ) )
if is_pred :
if bound_type != IdentKind . Predicate :
return Err ( ObjectUsedInPropContext ( self . ident , bound_type ) )
else :
if bound_type == IdentKind . Function :
if not len ( self . arguments ) :
return Err ( MissingArguments ( self . ident ) )
elif bound_type == IdentKind . Predicate :
return Err ( PropUsedInObjectContext ( self . ident ) )
else :
if len ( self . arguments ) :
return Err ( CallingNonFunc ( self . ident , bound_type ) )
if bound_type == IdentKind . Variable :
return Ok ( IRVar ( self . ident ) )
else :
arg_ir = sequence ( [ t . make_ir ( idents , False ) for t in self . arguments ] )
return map_res ( p ( IRProp , self . ident ) , arg_ir )
2023-03-05 03:02:16 +00:00
@dataclass ( frozen = True )
2023-03-05 04:13:25 +00:00
class IRProp :
lexeme : Lexeme [ Tok ]
arguments : ' Sequence[IRTerm] '
def __str__ ( self ) - > str :
return f ' { self . lexeme . matched_string } ( { " , " . join ( str ( arg ) for arg in self . arguments ) } ) '
2023-03-05 03:02:16 +00:00
2023-03-05 04:13:25 +00:00
@dataclass ( frozen = True )
class IRVar :
lexeme : Lexeme [ Tok ]
2023-03-05 03:02:16 +00:00
def __str__ ( self ) - > str :
2023-03-05 04:13:25 +00:00
return f ' * { self . lexeme . matched_string } '
@dataclass ( frozen = True )
class IRNeg :
inner : ' IRTerm '
def __str__ ( self ) - > str :
return f ' ¬ { self . inner } '
IRTerm : TypeAlias = IRVar | IRProp | IRNeg
2023-03-05 21:00:07 +00:00
@cur2
2023-03-05 04:13:25 +00:00
def make_ir (
2023-03-05 21:00:07 +00:00
idents : IdentBindings ,
2023-03-05 04:13:25 +00:00
clauses : Sequence [ Sequence [ ASTTerm ] ] ,
2023-03-05 04:43:26 +00:00
) - > Result [ Sequence [ Sequence [ IRTerm ] ] , GenIrError ] :
2023-03-05 21:00:07 +00:00
return sequence ( [ sequence ( [ term . make_ir ( idents , True ) for term in clause ] ) for clause in clauses ] )
2023-03-05 03:02:16 +00:00
def cons ( stack : Sequence [ Any ] ) - > Sequence [ Any ] :
match stack :
case [ rest , head , * popped_stack ] :
return ( ( head , * rest ) , * popped_stack )
case bad_stack :
raise Exception ( " Unexpected stack state! " , bad_stack )
nil : Sequence [ Any ] = tuple ( )
@cur2
def introduce (
cons : Any ,
stack : Sequence [ Any ]
) - > Sequence [ Any ] :
return ( cons , * stack )
def f_apply ( stack : Sequence [ Any ] ) - > Sequence [ Any ] :
match stack :
case [ arg , func , * popped_stack ] if hasattr ( func , ' __call__ ' ) :
return ( func ( arg ) , * popped_stack )
raise Exception ( " Unexpected stack state! " , stack )
@cur2
def call_func ( func : Callable [ [ Any ] , Any ] , stack : Sequence [ Any ] ) - > Sequence [ Any ] :
match stack :
case [ arg , * popped_stack ] :
return ( func ( arg ) , * popped_stack )
case bad_stack :
raise Exception ( " Unexpected stack state! " , bad_stack )
def drop ( stack : Sequence [ Any ] ) - > Sequence [ Any ] :
return stack [ 1 : ]
GRAMMAR : Sequence [ Tuple [ Variable , Sequence [ Variable | Tok | Action ] ] ] = [
2023-03-04 17:42:43 +00:00
( Variable . Start ,
2023-03-05 21:00:07 +00:00
[ Tok . PredicateSection , drop , Variable . Idents , call_func ( p ( p , p , p , IdentBindings ) ) , Tok . Newline , drop
2023-03-05 03:02:16 +00:00
, Tok . VariablesSection , drop , Variable . Idents , f_apply , Tok . Newline , drop
, Tok . ConstantsSection , drop , Variable . Idents , f_apply , Tok . Newline , drop
2023-03-05 21:00:07 +00:00
, Tok . FunctionsSection , drop , Variable . Idents , f_apply , call_func ( make_ir ) , Tok . Newline , drop
2023-03-05 03:02:16 +00:00
, Tok . ClausesSection , drop , Variable . Clauses , f_apply , Tok . Eof , drop ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Idents ,
2023-03-05 21:00:07 +00:00
[ Tok . Identifier , call_func ( lambda i : i . matched_string ) , Variable . Idents , cons ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Idents ,
2023-03-05 03:02:16 +00:00
[ introduce ( nil ) ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Clauses ,
2023-03-05 03:02:16 +00:00
[ Tok . Newline , drop , Variable . Clauses_ ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Clauses ,
2023-03-05 03:02:16 +00:00
[ introduce ( nil ) ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Clauses_ ,
2023-03-05 03:02:16 +00:00
[ Variable . Clause , Variable . Clauses , cons ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Clauses_ ,
2023-03-05 03:02:16 +00:00
[ introduce ( nil ) ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Clause ,
2023-03-05 03:02:16 +00:00
[ Variable . Term , Variable . Clause_ , cons ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Clause_ ,
[ Variable . Clause ] ) ,
( Variable . Clause_ ,
2023-03-05 03:02:16 +00:00
[ introduce ( nil ) ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Term ,
2023-03-05 21:00:07 +00:00
[ Tok . Negate , call_func ( cur2 ( ASTNegated ) ) , Variable . Term , f_apply ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Term ,
2023-03-05 03:02:16 +00:00
[ Tok . Identifier , call_func ( cur2 ( ASTProp ) ) , Variable . Func , f_apply ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Func ,
2023-03-05 03:02:16 +00:00
[ Tok . OpenP , drop , Variable . Term , Variable . CSTerms , cons , Tok . CloseP , drop ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . Func ,
2023-03-05 03:02:16 +00:00
[ introduce ( nil ) ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . CSTerms ,
2023-03-05 03:02:16 +00:00
[ Tok . Comma , drop , Variable . Term , Variable . CSTerms , cons ] ) ,
2023-03-04 17:42:43 +00:00
( Variable . CSTerms ,
2023-03-05 03:02:16 +00:00
[ introduce ( nil ) ] ) ,
2023-03-04 17:42:43 +00:00
]
"""
Implements the following grammar :
Start := PredicateSection < Idents > Newline
VariablesSection < Idents > Newline
ConstantsSection < Idents > Newline
FunctionsSection < Idents > Newline
ClausesSection < Clauses > Eof
Idents := Identifier < Idents >
:= ε
Clauses := Newline < Clauses ' >
:= ε
Clauses ' := <Clause> <Clauses>
:= ε
Clause := < Term > < Clause ' >
Clause ' := <Clause>
:= ε
Term := Negate < Term >
:= Identifier < Func ? >
Func ? := OpenP < Term > < CSTerms > CloseP
:= ε
CSTerms := Comma < Term > < CSTerms >
:= ε
2023-03-04 17:47:56 +00:00
"""
if __name__ == ' __main__ ' :
2023-03-05 21:00:07 +00:00
# from emis_funky_funktions import cur2, flip
2023-03-05 03:02:16 +00:00
# from build_oracle import print_oracle_table_enum, oracle_table
# print(print_oracle_table_enum(oracle_table(flip(cur2(isinstance))(Tok), GRAMMAR))) #type: ignore
2023-03-05 21:00:07 +00:00
from build_oracle import oracle_table
from parse import parser
from lex import tokenize
with open ( ' sample.cnf ' ) as file :
lexemes = unwrap_r ( tokenize ( LEX_TABLE , [ Tok . Whitespace ] , Tok . Eof , file . read ( ) ) )
oracle_table_ = oracle_table ( p_instance ( Tok ) , p_instance ( Variable ) , GRAMMAR ) #type:ignore
parser_ = parser ( oracle_table_ , flip ( cur2 ( getattr ) ) ( ' token ' ) , Variable . Start )
maybe_ast = parser_ ( lexemes )
match maybe_ast :
case Ok ( [ Ok ( ast ) ] ) :
print ( ' \n ' . join ( ' or ' . join ( str ( t ) for t in c ) for c in ast ) )
case Ok ( [ Err ( err ) ] ) :
print ( err )
case Ok ( huh ) :
print ( ' Unexpected end result: ' , huh )
case Err ( ( Lexeme ( token , text , line , col_start , col_end ) , expected ) ) :
print ( f ' Parse error! Line { line } : { col_start } - { col_end } \n \n Got: { repr ( text ) } \n Expected: { expected } ' )