59 lines
2 KiB
Python
59 lines
2 KiB
Python
|
from emis_funky_funktions import *
|
||
|
|
||
|
from typing import AbstractSet, FrozenSet, TypeAlias, TypeGuard, TypeVar
|
||
|
|
||
|
Lexeme = TypeVar('Lexeme')
|
||
|
Token = TypeVar('Token')
|
||
|
Variable = TypeVar('Variable')
|
||
|
|
||
|
Handle: TypeAlias = Sequence[Variable | Token]
|
||
|
Production: TypeAlias = Tuple[Variable, Handle[Variable, Token]]
|
||
|
Grammar: TypeAlias = Sequence[Production[Variable, Token]]
|
||
|
|
||
|
NfaState: TypeAlias = Tuple[int, int]
|
||
|
Nfa: TypeAlias = Callable[[NfaState, Variable | Token], FrozenSet[NfaState]]
|
||
|
|
||
|
DfaState: TypeAlias = FrozenSet(Tuple[int, int])
|
||
|
Dfa: TypeAlias = Callable[[DfaState, Variable | Token], FrozenSet[NfaState]]
|
||
|
|
||
|
def build_nfa(
|
||
|
is_var: Callable[[Variable | Token], TypeGuard[Variable]],
|
||
|
grammar: Grammar[Variable, Token],
|
||
|
) -> Nfa[Variable, Token]:
|
||
|
|
||
|
def epsilon_closure_step(state: NfaState) -> FrozenSet[NfaState]:
|
||
|
production_no, symbol_no = state
|
||
|
_, production = grammar[production_no]
|
||
|
next_symbol = production[symbol_no]
|
||
|
|
||
|
if is_var(next_symbol):
|
||
|
possible_productions: Iterator[NfaState] = ((i, 0) for i, (variable, handle) in enumerate(grammar) if variable == next_symbol)
|
||
|
return fset(state, *possible_productions)
|
||
|
else:
|
||
|
return fset(state,)
|
||
|
|
||
|
def epsilon_closure(states: FrozenSet[NfaState], previous_states: FrozenSet[NfaState] = fset()) -> FrozenSet[NfaState]:
|
||
|
new_states = FSet(new_state for old_state in states for new_state in epsilon_closure_step(old_state)) - previous_states - states
|
||
|
if len(new_states) == 0:
|
||
|
return states | previous_states
|
||
|
else:
|
||
|
return epsilon_closure(new_states, states | previous_states)
|
||
|
|
||
|
def nfa(state: Tuple[int, int], symbol: Variable | Token) -> FrozenSet[NfaState]:
|
||
|
production_no, symbol_no = state
|
||
|
production = grammar[production_no]
|
||
|
next_symbol = production[symbol_no]
|
||
|
if next_symbol == symbol:
|
||
|
return epsilon_closure(fset((production_no, symbol_no + 1)))
|
||
|
else:
|
||
|
return fset()
|
||
|
|
||
|
def dfa(dstate: DfaState, symbol: Variable | Token) -> DfaState:
|
||
|
return FSet(
|
||
|
new_nstate
|
||
|
for nstate in dstate
|
||
|
for new_nstate in nfa(nstate, symbol)
|
||
|
)
|
||
|
|
||
|
return nfa
|