Push
This commit is contained in:
parent
5cf30b5c71
commit
6ec8a1ec6c
35
ex/arrays.calc
Normal file
35
ex/arrays.calc
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
uint8 = 2
|
||||
uint16 = 4
|
||||
uint32 = 8
|
||||
uint64 = 16
|
||||
|
||||
arrMake(size, bytesize) = malloc(size*bytesize)
|
||||
|
||||
arrGet(arr, index, bytesize) =
|
||||
reduce(
|
||||
(x, y) = x + (y<<8),
|
||||
slice(arr, index*bytesize, index*bytesize+bytesize)
|
||||
)
|
||||
|
||||
arrSet(arr, index, bytesize, value) =
|
||||
index=index*bytesize;
|
||||
map(
|
||||
(x) = set(arr, index+x, value >> (x<<3) & 0xff),
|
||||
range(0, bytesize)
|
||||
);
|
||||
0
|
||||
arrMap(arr, func, bytesize) =
|
||||
map(
|
||||
(x) = func(arrGet(arr, x, bytesize)),
|
||||
range(0, len(arr)//bytesize)
|
||||
);
|
||||
0
|
||||
|
||||
mem = arrMake(4, 4)
|
||||
arrSet(mem, 0, 4, 32754)
|
||||
arrSet(mem, 1, 4, 167)
|
||||
arrSet(mem, 3, 4, 12765)
|
||||
println(mem)
|
||||
println(arrGet(mem, 0, 4))
|
||||
arrMap(mem, (x)=print(x, " "), 4)
|
||||
println()
|
||||
16
ex/breakinggates.calc
Normal file
16
ex/breakinggates.calc
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
src = input("1: ")
|
||||
modified = input("2: ")
|
||||
|
||||
count(string, char) =
|
||||
len(filter(
|
||||
(x) = x==char,
|
||||
string
|
||||
))
|
||||
|
||||
extra = 0
|
||||
map(
|
||||
(x) =
|
||||
if(count(src, x) != count(modified, x), () = extra=x),
|
||||
modified
|
||||
)
|
||||
println(extra)
|
||||
21
ex/fizzbuzz.calc
Normal file
21
ex/fizzbuzz.calc
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
limit_raw = input("Limit: ")
|
||||
limit = 0;
|
||||
get(map(
|
||||
(x) =
|
||||
limit = (limit*10) + (x-48),
|
||||
limit_raw
|
||||
),
|
||||
len(limit_raw)-1
|
||||
)
|
||||
|
||||
map(
|
||||
(x) =
|
||||
branch(
|
||||
x % 15 == 0, () = print("fizzbuzz "),
|
||||
x % 3 == 0, () = print("fizz "),
|
||||
x % 5 == 0, () = print("buzz "),
|
||||
() = print(x, " ")
|
||||
),
|
||||
range(1, limit+1)
|
||||
)
|
||||
println()
|
||||
46
ex/turingmachine.calc
Normal file
46
ex/turingmachine.calc
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
rulesMap(func, rules) =
|
||||
map(
|
||||
(x) = func(slice(rules, x, x+5)),
|
||||
range(0, len(rules), 5)
|
||||
)
|
||||
|
||||
step(tape, rules, state, position) =
|
||||
char = get(tape, position);
|
||||
selectedRule = 0;
|
||||
rulesMap(
|
||||
(rule) = if(
|
||||
(get(rule, 0) == state) + (get(rule, 1) == char) == 2,
|
||||
() = selectedRule = rule
|
||||
),
|
||||
rules
|
||||
);
|
||||
set(tape, position, get(selectedRule, 3));
|
||||
moveHead = get(selectedRule, 4);
|
||||
branch(
|
||||
moveHead == 2,
|
||||
() = position = position + 1,
|
||||
moveHead == 1,
|
||||
() = position = position - 1,
|
||||
);
|
||||
mallocfor(get(selectedRule, 2), position)
|
||||
|
||||
|
||||
rules = mallocfor(
|
||||
1, ord("0"), 1, ord("1"), 2,
|
||||
1, ord("1"), 1, ord("0"), 2,
|
||||
1, ord("*"), 2, ord("*"), 1
|
||||
)
|
||||
tape = map(ord, "010011001*")
|
||||
println("tape was: ", strjoin("", map(chr, tape)))
|
||||
|
||||
state = 1
|
||||
position = 0
|
||||
while(
|
||||
() = state != 2,
|
||||
() =
|
||||
result = step(tape, rules, state, position);
|
||||
state = get(result, 0);
|
||||
position = get(result, 1)
|
||||
)
|
||||
|
||||
println("tape became: ", strjoin("", map(chr, tape)))
|
||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
270
src/interpreter/interpret.py
Normal file
270
src/interpreter/interpret.py
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
import operator
|
||||
from functools import reduce
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, Tuple, Union, List
|
||||
|
||||
from src.lex import tokenizer as _tokenizer
|
||||
from src.stack import builder
|
||||
from src.tokentypes.tokens import Token, Tokens, Function
|
||||
from src.tokentypes.types import (TokenKind, TokenType, Stack, Namespace, Number,
|
||||
NamespaceValue, ArgumentsError, NameNotFoundError,
|
||||
InvalidSyntaxError, ExternalFunctionError,
|
||||
PyCalcError, NoCodeError)
|
||||
|
||||
|
||||
Value = Union[Number, Function]
|
||||
|
||||
|
||||
class NamespaceStack(Stack[dict]):
|
||||
def add_namespaces(self, *namespaces: Namespace):
|
||||
for namespace in namespaces:
|
||||
self.append(namespace)
|
||||
|
||||
def add_namespace(self, namespace: Namespace):
|
||||
self.append(namespace)
|
||||
|
||||
def with_add_namespace(self, namespace: Namespace) -> "NamespaceStack":
|
||||
self.add_namespace(namespace)
|
||||
return self
|
||||
|
||||
def get(self, var: str) -> NamespaceValue:
|
||||
for namespace in self[::-1]:
|
||||
if var in namespace:
|
||||
return namespace[var]
|
||||
|
||||
raise NameNotFoundError(var, (-1, -1))
|
||||
|
||||
def set(self, key: str, value: NamespaceValue):
|
||||
for namespace in self[::-1]:
|
||||
if key in namespace:
|
||||
namespace[key] = value
|
||||
break
|
||||
else:
|
||||
self.top[key] = value
|
||||
|
||||
def copy(self) -> "NamespaceStack":
|
||||
return NamespaceStack(super().copy())
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.pop()
|
||||
|
||||
|
||||
class ABCInterpreter(ABC):
|
||||
@abstractmethod
|
||||
def interpret(self, code: str, namespace: Namespace) -> Value:
|
||||
"""
|
||||
Receives expression as a string and basic namespace.
|
||||
This namespace will be in the beginning of the namespaces stack
|
||||
Returns the last one element in a stack (if multiple left SyntaxError
|
||||
will be raised)
|
||||
"""
|
||||
|
||||
|
||||
class Interpreter(ABCInterpreter):
|
||||
unary_executors = {
|
||||
TokenType.UN_POS: operator.pos,
|
||||
TokenType.UN_NEG: operator.neg,
|
||||
}
|
||||
executors = {
|
||||
TokenType.OP_ADD: operator.add,
|
||||
TokenType.OP_SUB: operator.sub,
|
||||
|
||||
TokenType.OP_DIV: operator.truediv,
|
||||
TokenType.OP_FLOORDIV: operator.floordiv, # it's me!
|
||||
TokenType.OP_MUL: operator.mul,
|
||||
TokenType.OP_MOD: operator.mod,
|
||||
TokenType.OP_LSHIFT: operator.lshift,
|
||||
TokenType.OP_RSHIFT: operator.rshift,
|
||||
TokenType.OP_BITWISE_AND: operator.and_,
|
||||
TokenType.OP_BITWISE_OR: operator.or_,
|
||||
TokenType.OP_BITWISE_XOR: operator.xor,
|
||||
|
||||
TokenType.OP_DOT: getattr,
|
||||
TokenType.OP_EQEQ: operator.eq,
|
||||
TokenType.OP_NOTEQ: operator.ne,
|
||||
TokenType.OP_GT: operator.gt,
|
||||
TokenType.OP_GE: operator.ge,
|
||||
TokenType.OP_LT: operator.lt,
|
||||
TokenType.OP_LE: operator.le,
|
||||
|
||||
TokenType.OP_POW: operator.pow,
|
||||
}
|
||||
|
||||
def __init__(self,
|
||||
tokenize: Optional[_tokenizer.ABCTokenizer] = None,
|
||||
stackbuilder: Optional[builder.ABCBuilder] = None,
|
||||
):
|
||||
self.tokenizer = tokenize or _tokenizer.Tokenizer()
|
||||
self.stackbuilder = stackbuilder or builder.SortingStationBuilder()
|
||||
|
||||
def interpret(self, code: str, namespace: Namespace) -> Value:
|
||||
"""
|
||||
Currently parses only one-line expressions
|
||||
"""
|
||||
|
||||
tokens = self.tokenizer.tokenize(code)
|
||||
stacks = self.stackbuilder.build(tokens)
|
||||
namespaces = NamespaceStack()
|
||||
namespaces.add_namespaces(namespace, {})
|
||||
|
||||
return self._interpreter(stacks, namespaces)
|
||||
|
||||
def _interpreter(self, exprs: List[Stack[Token]], namespaces: NamespaceStack) -> Value:
|
||||
if not exprs:
|
||||
raise NoCodeError
|
||||
|
||||
return list(map(lambda expr: self._interpret_line(expr, namespaces), exprs))[-1]
|
||||
|
||||
def _interpret_line(self, expression: Stack[Token], namespaces: NamespaceStack) -> Value:
|
||||
stack: Stack[Token] = Stack()
|
||||
|
||||
for i, token in enumerate(expression):
|
||||
if token.kind in (TokenKind.NUMBER, TokenKind.STRING) \
|
||||
or token.type == TokenType.IDENTIFIER:
|
||||
stack.append(token)
|
||||
elif token.type == TokenType.VAR:
|
||||
try:
|
||||
stack.append(self._token(namespaces.get(token.value), token.pos))
|
||||
except NameNotFoundError as exc:
|
||||
raise NameNotFoundError(str(exc), token.pos) from None
|
||||
|
||||
elif token.kind == TokenKind.UNARY_OPERATOR:
|
||||
stack.append(self._token(
|
||||
self.unary_executors[token.type](stack.pop().value), # noqa
|
||||
token.pos
|
||||
))
|
||||
|
||||
elif token.type == TokenType.OP_SEMICOLON:
|
||||
if len(stack) > 1:
|
||||
raise SyntaxError("multiple values left in stack")
|
||||
|
||||
stack.pop()
|
||||
elif token.type == TokenType.OP_EQ:
|
||||
right, left = stack.pop(), stack.pop()
|
||||
namespaces.set(left.value, right.value)
|
||||
stack.append(right)
|
||||
|
||||
elif token.kind == TokenKind.OPERATOR:
|
||||
right, left = stack.pop(), stack.pop()
|
||||
stack.append(self._token(
|
||||
self.executors[token.type](left.value, right.value),
|
||||
token.pos
|
||||
))
|
||||
elif token.type == TokenType.FUNCCALL:
|
||||
try:
|
||||
func = namespaces.get(token.value.name)
|
||||
except NameNotFoundError as exc:
|
||||
raise NameNotFoundError(str(exc), token.pos) from None
|
||||
|
||||
stack, args = self._get_func_args(token.value.argscount, stack)
|
||||
|
||||
try:
|
||||
call_result = func(*(arg.value for arg in args))
|
||||
except ArgumentsError as exc:
|
||||
raise ArgumentsError(str(exc), token.pos) from None
|
||||
except PyCalcError as exc:
|
||||
raise exc from None
|
||||
except Exception as exc:
|
||||
raise ExternalFunctionError(str(exc), token.pos)
|
||||
|
||||
stack.append(self._token(call_result, token.pos))
|
||||
elif token.type == TokenType.FUNCDEF:
|
||||
func = self._spawn_function(
|
||||
namespace=namespaces.copy(),
|
||||
name=token.value.name,
|
||||
fargs=[tok.value for tok in token.value.args],
|
||||
body=token.value.body
|
||||
)
|
||||
|
||||
if token.value.name:
|
||||
namespaces.set(token.value.name, func)
|
||||
|
||||
stack.append(Token(
|
||||
kind=TokenKind.FUNC,
|
||||
typeof=TokenType.FUNC,
|
||||
value=func,
|
||||
pos=token.pos
|
||||
))
|
||||
else:
|
||||
raise InvalidSyntaxError(
|
||||
f"unknown token: {token.type.name}({token.value})",
|
||||
token.pos
|
||||
)
|
||||
|
||||
result = stack.pop()
|
||||
|
||||
if stack:
|
||||
raise InvalidSyntaxError("multiple values left in stack", stack[0].pos)
|
||||
|
||||
return result.value
|
||||
|
||||
def _spawn_function(self,
|
||||
namespace: NamespaceStack,
|
||||
name: str,
|
||||
fargs: List[str],
|
||||
body: Stack[Token]) -> Function:
|
||||
def real_function(*args) -> Number:
|
||||
if not fargs and args:
|
||||
raise ArgumentsError("function takes no arguments", (-1, -1))
|
||||
elif len(fargs) != len(args):
|
||||
text = (
|
||||
"not enough arguments",
|
||||
"too much arguments"
|
||||
)[len(fargs) < len(args)]
|
||||
|
||||
raise ArgumentsError(
|
||||
f"{text}: expected {len(fargs)}, got {len(args)}",
|
||||
(-1, -1)
|
||||
)
|
||||
|
||||
args_namespace = self._get_args_namespace(fargs, args)
|
||||
|
||||
with namespace.with_add_namespace(args_namespace):
|
||||
return self._interpret_line(body, namespace)
|
||||
|
||||
return Function(
|
||||
name=f"{name or '<lambda>'}({','.join(fargs)})",
|
||||
target=real_function
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _token(num: Number, pos: Tuple[int, int]) -> Token:
|
||||
if isinstance(num, int):
|
||||
return Token(
|
||||
kind=TokenKind.NUMBER,
|
||||
typeof=TokenType.INTEGER,
|
||||
value=int(num),
|
||||
pos=pos
|
||||
)
|
||||
elif isinstance(num, float):
|
||||
return Token(
|
||||
kind=TokenKind.NUMBER,
|
||||
typeof=TokenType.FLOAT,
|
||||
value=num,
|
||||
pos=pos
|
||||
)
|
||||
else:
|
||||
return Token(
|
||||
kind=TokenKind.OTHER,
|
||||
typeof=TokenType.OTHER,
|
||||
value=num,
|
||||
pos=pos
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_func_args(argscount: int, stack: Stack[Token]) -> Tuple[Stack[Token], Tokens]:
|
||||
if not argscount:
|
||||
return stack, []
|
||||
|
||||
return stack[:-argscount], stack[-argscount:]
|
||||
|
||||
@staticmethod
|
||||
def _get_args_namespace(fargs, args) -> Namespace:
|
||||
return dict(zip(fargs, args))
|
||||
|
||||
@staticmethod
|
||||
def _merge_namespaces(*namespaces: Namespace):
|
||||
return reduce(lambda a, b: {**a, **b}, namespaces)
|
||||
1
src/lex/__init__.py
Normal file
1
src/lex/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from . import tokenizer
|
||||
527
src/lex/tokenizer.py
Normal file
527
src/lex/tokenizer.py
Normal file
|
|
@ -0,0 +1,527 @@
|
|||
import enum
|
||||
import string
|
||||
from functools import reduce
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Iterator, Tuple, Callable
|
||||
|
||||
from src.tokentypes.tokens import Lexeme, Lexemes, Token, Tokens, FuncDef
|
||||
from src.tokentypes.types import (LexemeType, TokenType, TokenKind, OPERATORS_TABLE,
|
||||
OPERATORS_CHARS, UNARY_OPERATORS, Stack,
|
||||
InvalidSyntaxError)
|
||||
|
||||
|
||||
class _LexerState(enum.IntEnum):
|
||||
ANY = 1
|
||||
OPERATOR = 2
|
||||
NOT_OPERATOR = 3
|
||||
STRING = 4
|
||||
STRING_BACKSLASH = 5
|
||||
|
||||
|
||||
class _ParserState(enum.IntEnum):
|
||||
ARG = 1
|
||||
ARG_COMMA = 3
|
||||
EQ = 5
|
||||
FUNCNAME = 6
|
||||
OTHER = 7
|
||||
|
||||
|
||||
class ABCTokenizer(ABC):
|
||||
@abstractmethod
|
||||
def tokenize(self, data: str) -> Tokens:
|
||||
|
||||
def tokenize(data: str) -> List[Tokens]:
|
||||
return Tokenizer().tokenize(data)
|
||||
|
||||
|
||||
class Tokenizer(ABCTokenizer):
|
||||
def tokenize(self, data: str) -> List[Tokens]:
|
||||
if not data:
|
||||
return []
|
||||
|
||||
lexemes: Lexemes = []
|
||||
lineno = 0
|
||||
|
||||
for element, is_op, pos in self._lex(data):
|
||||
if is_op:
|
||||
op, unaries = self._parse_ops(element, lineno, pos)
|
||||
lexemes.append(op)
|
||||
lexemes.extend(unaries)
|
||||
else:
|
||||
if element == "\n":
|
||||
lineno += 1
|
||||
|
||||
lexemes.append(self._parse_lexeme(element, lineno, pos))
|
||||
|
||||
output: List[Tokens] = []
|
||||
|
||||
for line in self._split_lines(lexemes):
|
||||
unary = self._parse_unary(list(map(
|
||||
self._lexeme2token, line
|
||||
)))
|
||||
output.append(self._mark_identifiers(unary))
|
||||
|
||||
return output
|
||||
|
||||
def _lex(self, data: str) -> Iterator[Tuple[str, bool, int]]:
|
||||
buff: List[str] = []
|
||||
state = _LexerState.ANY
|
||||
pos = 0
|
||||
lineno = 0
|
||||
|
||||
for i, char in enumerate(data):
|
||||
char_state = self._get_lexer_state_for_char(char)
|
||||
|
||||
if state == _LexerState.ANY:
|
||||
state = char_state
|
||||
|
||||
if state == _LexerState.STRING:
|
||||
if char == "\"" and buff:
|
||||
buff.append(char)
|
||||
yield "".join(buff), False, pos-len(buff)+1
|
||||
buff.clear()
|
||||
state = _LexerState.ANY
|
||||
elif char == "\\":
|
||||
state = _LexerState.STRING_BACKSLASH
|
||||
buff.append(char)
|
||||
else:
|
||||
buff.append(char)
|
||||
elif state == _LexerState.STRING_BACKSLASH:
|
||||
buff.append(char)
|
||||
state = _LexerState.STRING
|
||||
elif char == " ":
|
||||
if buff:
|
||||
yield "".join(buff), state == _LexerState.OPERATOR, pos
|
||||
buff.clear()
|
||||
elif char == "\n":
|
||||
if buff:
|
||||
yield "".join(buff), state == _LexerState.OPERATOR, pos
|
||||
buff.clear()
|
||||
|
||||
state = _LexerState.ANY
|
||||
pos = 0
|
||||
lineno += 1
|
||||
yield "\n", False, pos
|
||||
elif char in "()":
|
||||
if buff:
|
||||
yield "".join(buff), state == _LexerState.OPERATOR, pos-len(buff)
|
||||
buff.clear()
|
||||
|
||||
yield char, False, pos
|
||||
elif char == ".":
|
||||
if i == len(data)-1:
|
||||
raise InvalidSyntaxError(
|
||||
"unexpected dot in the end of the expression",
|
||||
(lineno, i)
|
||||
)
|
||||
|
||||
if data[i+1] in string.digits:
|
||||
buff.append(char)
|
||||
else:
|
||||
if buff:
|
||||
yield "".join(buff), state == _LexerState.OPERATOR, i-len(buff)
|
||||
buff.clear()
|
||||
|
||||
yield char, True, i
|
||||
|
||||
state = _LexerState.NOT_OPERATOR
|
||||
elif state != char_state:
|
||||
if buff:
|
||||
yield "".join(buff), state == _LexerState.OPERATOR, pos
|
||||
buff.clear()
|
||||
|
||||
buff.append(char)
|
||||
state = char_state
|
||||
else:
|
||||
buff.append(char)
|
||||
|
||||
pos += 1
|
||||
|
||||
if buff:
|
||||
yield "".join(buff), state == _LexerState.OPERATOR, pos+1
|
||||
|
||||
@staticmethod
|
||||
def _get_lexer_state_for_char(char: str) -> _LexerState:
|
||||
if char in OPERATORS_CHARS:
|
||||
return _LexerState.OPERATOR
|
||||
elif char == "\"":
|
||||
return _LexerState.STRING
|
||||
|
||||
return _LexerState.NOT_OPERATOR
|
||||
|
||||
@staticmethod
|
||||
def _split_lines(lexemes: Lexemes) -> List[Lexemes]:
|
||||
output: List[Lexemes] = [[]]
|
||||
parens = 0
|
||||
|
||||
for i, lexeme in enumerate(lexemes):
|
||||
if lexeme.type == LexemeType.LPAREN:
|
||||
parens += 1
|
||||
output[-1].append(lexeme)
|
||||
elif lexeme.type == LexemeType.RPAREN:
|
||||
if not parens:
|
||||
raise InvalidSyntaxError("unexpected closing parenthesis", lexeme.pos)
|
||||
|
||||
parens -= 1
|
||||
output[-1].append(lexeme)
|
||||
elif lexeme.type == LexemeType.EOL:
|
||||
if i > 0 and (lexemes[i-1].type == LexemeType.OPERATOR or parens):
|
||||
continue
|
||||
|
||||
output.append([])
|
||||
else:
|
||||
output[-1].append(lexeme)
|
||||
|
||||
return list(filter(bool, output))
|
||||
|
||||
def _parse_unary(self, tokens: Tokens) -> Tokens:
|
||||
output: Tokens = []
|
||||
buffer: Tokens = []
|
||||
|
||||
if tokens[0].kind == TokenKind.OPERATOR:
|
||||
for i, token in enumerate(tokens):
|
||||
if token.kind != TokenKind.OPERATOR:
|
||||
tokens = tokens[i:]
|
||||
break
|
||||
|
||||
buffer.append(token)
|
||||
|
||||
unary = self._calculate_final_unary(buffer)
|
||||
output.append(Token(
|
||||
kind=TokenKind.UNARY_OPERATOR,
|
||||
typeof=unary,
|
||||
value="+" if unary == TokenType.UN_POS else "-",
|
||||
pos=(tokens[0].pos[0], 0)
|
||||
))
|
||||
buffer.clear()
|
||||
else:
|
||||
output.append(tokens[0])
|
||||
tokens = tokens[1:]
|
||||
|
||||
for i, token in enumerate(tokens):
|
||||
if buffer:
|
||||
if token.kind == TokenKind.OPERATOR:
|
||||
buffer.append(token)
|
||||
else:
|
||||
output.append(buffer[0])
|
||||
|
||||
if buffer[1:]:
|
||||
unary = self._calculate_final_unary(buffer[1:])
|
||||
output.append(Token(
|
||||
kind=TokenKind.UNARY_OPERATOR,
|
||||
typeof=unary,
|
||||
value="+" if unary == TokenType.UN_POS else "-",
|
||||
pos=buffer[-1].pos
|
||||
))
|
||||
|
||||
buffer.clear()
|
||||
output.append(token)
|
||||
elif token.kind == TokenKind.OPERATOR:
|
||||
buffer.append(token)
|
||||
else:
|
||||
output.append(token)
|
||||
|
||||
if buffer:
|
||||
if len(buffer) == 1 and buffer[0].type == TokenType.OP_SEMICOLON:
|
||||
output.append(buffer.pop())
|
||||
else:
|
||||
raise InvalidSyntaxError(
|
||||
"unexpected operator in the end of the expression",
|
||||
buffer[-1].pos
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def _calculate_final_unary(ops: Tokens) -> TokenType:
|
||||
if not ops:
|
||||
raise ValueError("_calculate_final_query(): ops are empty")
|
||||
|
||||
subs = 0
|
||||
|
||||
for i, token in enumerate(ops):
|
||||
if token.value not in UNARY_OPERATORS:
|
||||
raise InvalidSyntaxError(f"illegal unary: {token.value}", token.pos)
|
||||
|
||||
subs += token.value == '-'
|
||||
|
||||
# hehe, pretty tricky, isn't it?
|
||||
return TokenType.UN_NEG if subs & 1 else TokenType.UN_POS
|
||||
|
||||
def _parse_ops(self, raw_op: str, lineno: int, pos: int) -> Tuple[Lexeme, Lexemes]:
|
||||
"""
|
||||
Splits a string of operators into actual and
|
||||
unary operators
|
||||
"""
|
||||
|
||||
op_len = len(max(OPERATORS_TABLE.keys(), key=len))
|
||||
|
||||
while op_len > 0:
|
||||
op = raw_op[:op_len]
|
||||
|
||||
if op not in OPERATORS_TABLE:
|
||||
op_len -= 1
|
||||
continue
|
||||
|
||||
oper = self._get_op_lexeme(op, lineno, pos)
|
||||
unaries = map(
|
||||
lambda op_: self._get_op_lexeme(op_, lineno, pos+op_len),
|
||||
raw_op[op_len:]
|
||||
)
|
||||
|
||||
return oper, list(unaries)
|
||||
|
||||
raise InvalidSyntaxError(
|
||||
f"illegal operator: {raw_op[0]}",
|
||||
(lineno, pos)
|
||||
)
|
||||
|
||||
def _mark_identifiers(self, tokens: Tokens) -> Tokens:
|
||||
output = []
|
||||
state = _ParserState.OTHER
|
||||
empty_stack = Stack()
|
||||
funcdef = FuncDef("", [], empty_stack)
|
||||
prev_eq_pos = None
|
||||
|
||||
for i, token in enumerate(tokens[1:]):
|
||||
if token.type == TokenType.VAR and tokens[i].type == TokenType.OP_DOT:
|
||||
token.type = TokenType.IDENTIFIER
|
||||
|
||||
for i, token in enumerate(tokens[::-1]):
|
||||
if state == _ParserState.OTHER:
|
||||
if token.type == TokenType.OP_EQ:
|
||||
state = _ParserState.EQ
|
||||
prev_eq_pos = token.pos
|
||||
else:
|
||||
output.append(token)
|
||||
elif state == _ParserState.EQ:
|
||||
if token.type == TokenType.VAR:
|
||||
token.type = TokenType.IDENTIFIER
|
||||
state = _ParserState.OTHER
|
||||
output.append(Token(
|
||||
kind=TokenKind.OPERATOR,
|
||||
typeof=TokenType.OP_EQ,
|
||||
value="=",
|
||||
pos=prev_eq_pos,
|
||||
))
|
||||
output.append(token)
|
||||
elif token.type == TokenType.RPAREN:
|
||||
state = _ParserState.ARG
|
||||
else:
|
||||
raise InvalidSyntaxError(
|
||||
f"cannot assign to {repr(token.value)}",
|
||||
token.pos
|
||||
)
|
||||
elif state == _ParserState.ARG:
|
||||
if token.type == TokenType.OP_COMMA:
|
||||
raise InvalidSyntaxError("double comma", token.pos)
|
||||
elif token.type == TokenType.LPAREN:
|
||||
state = _ParserState.FUNCNAME
|
||||
continue
|
||||
elif token.type != TokenType.VAR:
|
||||
raise InvalidSyntaxError(
|
||||
f"illegal argument identifier: {repr(token.value)}",
|
||||
token.pos
|
||||
)
|
||||
|
||||
funcdef.args.append(token)
|
||||
token.type = TokenType.IDENTIFIER
|
||||
state = _ParserState.ARG_COMMA
|
||||
elif state == _ParserState.ARG_COMMA:
|
||||
if token.type == TokenType.LPAREN:
|
||||
state = _ParserState.FUNCNAME
|
||||
elif token.type != TokenType.OP_COMMA:
|
||||
raise InvalidSyntaxError(
|
||||
f"expected comma, got {repr(token.value)}",
|
||||
token.pos
|
||||
)
|
||||
else:
|
||||
state = _ParserState.ARG
|
||||
elif state == _ParserState.FUNCNAME:
|
||||
if token.type not in (TokenType.IDENTIFIER, TokenType.VAR):
|
||||
funcdef.name = ""
|
||||
|
||||
if token.type == TokenType.OP_EQ:
|
||||
state = _ParserState.EQ
|
||||
else:
|
||||
state = _ParserState.OTHER
|
||||
else:
|
||||
funcdef.name = token.value
|
||||
state = _ParserState.OTHER
|
||||
|
||||
line, column = token.pos
|
||||
column += bool(funcdef.name)
|
||||
|
||||
funcdef.args.reverse()
|
||||
output.append(Token(
|
||||
kind=TokenKind.FUNC,
|
||||
typeof=TokenType.FUNCDEF,
|
||||
value=funcdef,
|
||||
pos=(line, column)
|
||||
))
|
||||
|
||||
if token.type not in (TokenType.IDENTIFIER, TokenType.VAR, TokenType.OP_EQ):
|
||||
output.append(token)
|
||||
|
||||
funcdef = FuncDef("", [], empty_stack)
|
||||
|
||||
return self._fill_funcbodies(output[::-1])
|
||||
|
||||
def _fill_funcbodies(self, tokens: Tokens) -> Tokens:
|
||||
output: Tokens = []
|
||||
bodybuff = []
|
||||
lparens = 0
|
||||
|
||||
for token in tokens:
|
||||
if bodybuff:
|
||||
if token.type == TokenType.LPAREN:
|
||||
lparens += 1
|
||||
bodybuff.append(token)
|
||||
elif lparens and token.type == TokenType.RPAREN:
|
||||
lparens -= 1
|
||||
bodybuff.append(token)
|
||||
elif not lparens and token.type in (TokenType.OP_COMMA, TokenType.RPAREN):
|
||||
bodybuff[0].value.body = self._fill_funcbodies(bodybuff[1:])
|
||||
output.append(bodybuff[0])
|
||||
output.append(token)
|
||||
bodybuff.clear()
|
||||
else:
|
||||
bodybuff.append(token)
|
||||
else:
|
||||
if token.type == TokenType.FUNCDEF:
|
||||
bodybuff.append(token)
|
||||
else:
|
||||
output.append(token)
|
||||
|
||||
if bodybuff:
|
||||
bodybuff[0].value.body = self._fill_funcbodies(bodybuff[1:])
|
||||
output.append(bodybuff[0])
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def _get_op_lexeme(op: str, lineno: int, pos: int) -> Lexeme:
|
||||
return Lexeme(
|
||||
typeof=LexemeType.OPERATOR,
|
||||
value=op,
|
||||
pos=(lineno, pos)
|
||||
)
|
||||
|
||||
def _parse_lexeme(self, raw_lexeme: str, lineno: int, pos: int) -> Lexeme:
|
||||
get_lexeme = self._lexeme_getter(raw_lexeme, lineno, pos)
|
||||
|
||||
if raw_lexeme.startswith("0x"):
|
||||
if len(raw_lexeme) == 2:
|
||||
raise InvalidSyntaxError(
|
||||
"invalid hexdecimal value: 0x",
|
||||
(lineno, pos)
|
||||
)
|
||||
|
||||
return get_lexeme(LexemeType.HEXNUMBER)
|
||||
elif raw_lexeme[0] in string.digits + ".":
|
||||
if len(raw_lexeme) == raw_lexeme.count(".") or raw_lexeme.count(".") > 1:
|
||||
raise InvalidSyntaxError(
|
||||
f"invalid float: {raw_lexeme}",
|
||||
(lineno, pos)
|
||||
)
|
||||
|
||||
if "." in raw_lexeme:
|
||||
return get_lexeme(LexemeType.FLOAT)
|
||||
|
||||
return get_lexeme(LexemeType.NUMBER)
|
||||
elif raw_lexeme == "(":
|
||||
return get_lexeme(LexemeType.LPAREN)
|
||||
elif raw_lexeme == ")":
|
||||
return get_lexeme(LexemeType.RPAREN)
|
||||
elif raw_lexeme == "\n":
|
||||
return get_lexeme(LexemeType.EOL)
|
||||
elif raw_lexeme[0] == raw_lexeme[-1] == "\"":
|
||||
return get_lexeme(LexemeType.STRING)
|
||||
|
||||
return get_lexeme(LexemeType.LITERAL)
|
||||
|
||||
@staticmethod
|
||||
def _lexeme_getter(value: str, lineno: int, pos: int) -> Callable[[LexemeType], Lexeme]:
|
||||
def getter(typeof: LexemeType) -> Lexeme:
|
||||
return Lexeme(
|
||||
typeof=typeof,
|
||||
value=value,
|
||||
pos=(lineno, pos)
|
||||
)
|
||||
|
||||
return getter
|
||||
|
||||
@staticmethod
|
||||
def _lexeme2token(lexeme: Lexeme) -> Token:
|
||||
parentheses = {
|
||||
LexemeType.LPAREN: TokenType.LPAREN,
|
||||
LexemeType.RPAREN: TokenType.RPAREN
|
||||
}
|
||||
|
||||
if lexeme.type == LexemeType.NUMBER:
|
||||
return Token(
|
||||
kind=TokenKind.NUMBER,
|
||||
typeof=TokenType.INTEGER,
|
||||
value=int(lexeme.value),
|
||||
pos=lexeme.pos
|
||||
)
|
||||
elif lexeme.type == LexemeType.FLOAT:
|
||||
return Token(
|
||||
kind=TokenKind.NUMBER,
|
||||
typeof=TokenType.FLOAT,
|
||||
value=float(lexeme.value),
|
||||
pos=lexeme.pos
|
||||
)
|
||||
elif lexeme.type == LexemeType.HEXNUMBER:
|
||||
return Token(
|
||||
kind=TokenKind.NUMBER,
|
||||
typeof=TokenType.INTEGER,
|
||||
value=int(lexeme.value[2:], 16),
|
||||
pos=lexeme.pos
|
||||
)
|
||||
elif lexeme.type == LexemeType.LITERAL:
|
||||
return Token(
|
||||
kind=TokenKind.LITERAL,
|
||||
typeof=TokenType.VAR,
|
||||
value=lexeme.value,
|
||||
pos=lexeme.pos
|
||||
)
|
||||
elif lexeme.type == LexemeType.OPERATOR:
|
||||
return Token(
|
||||
kind=TokenKind.OPERATOR,
|
||||
typeof=OPERATORS_TABLE[lexeme.value],
|
||||
value=lexeme.value,
|
||||
pos=lexeme.pos
|
||||
)
|
||||
elif lexeme.type in parentheses:
|
||||
return Token(
|
||||
kind=TokenKind.PAREN,
|
||||
typeof=parentheses[lexeme.type],
|
||||
value=lexeme.value,
|
||||
pos=lexeme.pos
|
||||
)
|
||||
elif lexeme.type == LexemeType.STRING:
|
||||
return Token(
|
||||
kind=TokenKind.STRING,
|
||||
typeof=TokenType.STRING,
|
||||
value=_prepare_string(lexeme.value[1:-1]),
|
||||
pos=lexeme.pos
|
||||
)
|
||||
|
||||
raise InvalidSyntaxError("unexpected lexeme type: " + lexeme.type.name, lexeme.pos)
|
||||
|
||||
|
||||
def _prepare_string(string_val: str) -> str:
|
||||
replacements = {
|
||||
"\\\"": "\"",
|
||||
"\\n": "\n",
|
||||
"\\r": "\r",
|
||||
"\\t": "\t",
|
||||
"\\b": "\b",
|
||||
"\\f": "\f",
|
||||
"\\v": "\v",
|
||||
"\\0": "\0",
|
||||
"\\\\": "\\"
|
||||
}
|
||||
|
||||
return reduce(lambda a, b: a.replace(*b), replacements.items(), string_val)
|
||||
206
src/stack/builder.py
Normal file
206
src/stack/builder.py
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import Iterator, List, Tuple
|
||||
|
||||
from src.tokentypes.tokens import Token, Tokens, Func, FuncDef
|
||||
from src.tokentypes.types import (PRIORITIES_TABLE, TokenKind, TokenType,
|
||||
Stack, InvalidSyntaxError, UnknownTokenError)
|
||||
|
||||
|
||||
class ABCBuilder(ABC):
|
||||
@abstractmethod
|
||||
def build(self, tokens: List[Tokens]) -> List[Stack[Token]]:
|
||||
"""
|
||||
Builder receives tokens directly from tokenizer. These tokens
|
||||
already must be parsed into:
|
||||
- Identifiers
|
||||
- Variables
|
||||
- Unary tokens
|
||||
- Function calls and defines
|
||||
"""
|
||||
|
||||
|
||||
class SortingStationBuilder(ABCBuilder):
|
||||
"""
|
||||
This is a reference implementation of Sorting Station Algorithm
|
||||
"""
|
||||
|
||||
def build(self, tokens: List[Tokens]) -> List[Stack[Token]]:
|
||||
return list(map(self._build_line, tokens))
|
||||
|
||||
def _build_line(self, tokens: Tokens) -> Stack:
|
||||
output: Stack[Token] = Stack()
|
||||
divider = self._expr_divider(tokens)
|
||||
|
||||
for expr, semicolon_pos in divider:
|
||||
stack: Stack[Token] = Stack()
|
||||
args_counters = self._count_args(expr)[::-1]
|
||||
|
||||
for i, token in enumerate(expr):
|
||||
if token.kind in (TokenKind.NUMBER, TokenKind.STRING)\
|
||||
or token.type == TokenType.IDENTIFIER:
|
||||
output.append(token)
|
||||
elif token.type == TokenType.VAR:
|
||||
if i < len(expr)-1 and expr[i+1].type == TokenType.LPAREN:
|
||||
# it's a function!
|
||||
stack.append(self._get_func(token, args_counters.pop()))
|
||||
else:
|
||||
output.append(token)
|
||||
elif token.type == TokenType.FUNCDEF:
|
||||
output.append(Token(
|
||||
kind=token.kind,
|
||||
typeof=token.type,
|
||||
value=FuncDef(
|
||||
name=token.value.name,
|
||||
args=token.value.args,
|
||||
body=self._build_line(token.value.body)
|
||||
),
|
||||
pos=token.pos
|
||||
))
|
||||
elif token.type == TokenType.OP_COMMA:
|
||||
if not stack:
|
||||
raise InvalidSyntaxError(
|
||||
"missing left parenthesis or comma",
|
||||
token.pos
|
||||
)
|
||||
|
||||
try:
|
||||
while stack.top.type != TokenType.LPAREN:
|
||||
output.append(stack.pop())
|
||||
except IndexError:
|
||||
raise InvalidSyntaxError(
|
||||
"missing left parenthesis or comma",
|
||||
output[-1].pos
|
||||
) from None
|
||||
elif token.kind in (TokenKind.OPERATOR, TokenKind.UNARY_OPERATOR):
|
||||
priority = PRIORITIES_TABLE
|
||||
token_priority = priority[token.type]
|
||||
|
||||
while stack and (
|
||||
stack.top.kind in (TokenKind.OPERATOR, TokenKind.UNARY_OPERATOR, TokenKind.FUNC)
|
||||
and
|
||||
token_priority <= priority[stack.top.type]
|
||||
and
|
||||
stack.top.type != TokenType.OP_POW
|
||||
):
|
||||
output.append(stack.pop())
|
||||
|
||||
stack.append(token)
|
||||
elif token.type == TokenType.LPAREN:
|
||||
stack.append(token)
|
||||
elif token.type == TokenType.RPAREN:
|
||||
if not stack:
|
||||
raise InvalidSyntaxError(
|
||||
"missing opening parenthesis",
|
||||
token.pos
|
||||
)
|
||||
|
||||
try:
|
||||
while stack.top.type != TokenType.LPAREN:
|
||||
output.append(stack.pop())
|
||||
except IndexError:
|
||||
raise InvalidSyntaxError(
|
||||
"missing opening parenthesis",
|
||||
output[-1].pos
|
||||
) from None
|
||||
|
||||
stack.pop()
|
||||
|
||||
if stack and stack.top.type == TokenType.FUNCNAME:
|
||||
# it's a function!
|
||||
output.append(self._get_func(stack.pop(), args_counters.pop()))
|
||||
else:
|
||||
raise UnknownTokenError(f"unknown token: {token}", token.pos)
|
||||
|
||||
while stack:
|
||||
if stack.top.type == TokenType.LPAREN:
|
||||
raise InvalidSyntaxError("missing closing parenthesis", stack.top.pos)
|
||||
|
||||
output.append(stack.pop())
|
||||
|
||||
output.append(Token(
|
||||
kind=TokenKind.OPERATOR,
|
||||
typeof=TokenType.OP_SEMICOLON,
|
||||
value=";",
|
||||
pos=semicolon_pos
|
||||
))
|
||||
|
||||
return output[:-1] # remove trailing semicolon
|
||||
|
||||
def _count_args(self, tokens: Tokens) -> List[int]:
|
||||
result = []
|
||||
|
||||
for funccall in self.__find_funccalls(tokens):
|
||||
result.append(0)
|
||||
waitforcomma = False
|
||||
parens = 0
|
||||
|
||||
for token in funccall:
|
||||
if parens:
|
||||
if token.type == TokenType.LPAREN:
|
||||
parens += 1
|
||||
elif token.type == TokenType.RPAREN:
|
||||
parens -= 1
|
||||
|
||||
continue
|
||||
elif token.type == TokenType.LPAREN:
|
||||
parens += 1
|
||||
result[-1] += not waitforcomma
|
||||
waitforcomma = True
|
||||
elif waitforcomma:
|
||||
waitforcomma = token.type != TokenType.OP_COMMA
|
||||
else:
|
||||
result[-1] += 1
|
||||
waitforcomma = True
|
||||
|
||||
return result
|
||||
|
||||
def __find_funccalls(self, tokens: Tokens) -> List[Tokens]:
|
||||
funcs: List[Tokens] = []
|
||||
parens = 0
|
||||
|
||||
for i, token in enumerate(tokens[1:], start=1):
|
||||
if parens:
|
||||
if token.type == TokenType.LPAREN:
|
||||
parens += 1
|
||||
elif token.type == TokenType.RPAREN:
|
||||
parens -= 1
|
||||
|
||||
if not parens:
|
||||
funcs.extend(self.__find_funccalls(funcs[-1]))
|
||||
continue
|
||||
|
||||
funcs[-1].append(token)
|
||||
elif token.type == TokenType.LPAREN and tokens[i - 1].type == TokenType.VAR:
|
||||
parens = 1
|
||||
funcs.append([])
|
||||
|
||||
return funcs
|
||||
|
||||
@staticmethod
|
||||
def _expr_divider(expr: Tokens) -> Iterator[Tuple[Tokens, Tuple[int, int]]]:
|
||||
"""
|
||||
Yields expression and semicolon index
|
||||
"""
|
||||
|
||||
border = 0
|
||||
|
||||
for i, token in enumerate(expr):
|
||||
if token.type == TokenType.OP_SEMICOLON:
|
||||
yield expr[border:i], token.pos
|
||||
border = i + 1
|
||||
|
||||
# semicolon anyway cannot be in the end of the expression,
|
||||
# in case it is, error will be raised even before this func
|
||||
yield expr[border:], -1
|
||||
|
||||
@staticmethod
|
||||
def _get_func(token: Token, argscount: int) -> Token:
|
||||
return Token(
|
||||
kind=TokenKind.FUNC,
|
||||
typeof=TokenType.FUNCCALL,
|
||||
value=Func(
|
||||
name=token.value,
|
||||
argscount=argscount
|
||||
),
|
||||
pos=token.pos
|
||||
)
|
||||
1
src/tokentypes/__init__.py
Normal file
1
src/tokentypes/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from . import tokens, types
|
||||
90
src/tokentypes/tokens.py
Normal file
90
src/tokentypes/tokens.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
from typing import List, Union, Callable, Tuple
|
||||
|
||||
from . import types
|
||||
|
||||
Lexemes = List["Lexeme"]
|
||||
Tokens = List["Token"]
|
||||
TokenValue = Union[int, float, str, "Func", "FuncDef", "Function"]
|
||||
|
||||
|
||||
class Lexeme:
|
||||
"""
|
||||
A class that contains a raw piece of input stream.
|
||||
It may be: number, literal, operator, lbrace, rbrace
|
||||
"""
|
||||
|
||||
def __init__(self, typeof: types.LexemeType, value: str, pos: Tuple[int, int]):
|
||||
self.type = typeof
|
||||
self.value = value
|
||||
self.pos = pos
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.type.name}({repr(self.value)})"
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class Token:
|
||||
def __init__(self,
|
||||
kind: types.TokenKind,
|
||||
typeof: types.TokenType,
|
||||
value: TokenValue,
|
||||
pos: Tuple[int, int]
|
||||
):
|
||||
self.kind = kind
|
||||
self.type = typeof
|
||||
self.value = value
|
||||
self.pos = pos
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.kind.name}:{self.type.name}:{self.pos[1]}({repr(self.value)})"
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class Func:
|
||||
"""
|
||||
Func just represents some information about function call
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, argscount: int):
|
||||
self.name = name
|
||||
self.argscount = argscount
|
||||
|
||||
def __str__(self):
|
||||
return f"Func(name={repr(self.name)}, argscount={self.argscount})"
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class FuncDef:
|
||||
"""
|
||||
FuncDef represents function defining
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, args: Tokens, body: types.Stack):
|
||||
self.name = name
|
||||
self.args = args
|
||||
self.body = body
|
||||
|
||||
def __str__(self):
|
||||
return f"FuncDef(name={repr(self.name)}, " \
|
||||
f"args=({','.join(arg.value for arg in self.args)}), " \
|
||||
f"body={self.body})"
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class Function:
|
||||
def __init__(self, name: str, target: Callable):
|
||||
self.name = name
|
||||
self.target = target
|
||||
|
||||
@property
|
||||
def __call__(self):
|
||||
return self.target
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
__repr__ = __str__
|
||||
196
src/tokentypes/types.py
Normal file
196
src/tokentypes/types.py
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
import enum
|
||||
from operator import add
|
||||
from functools import reduce
|
||||
from string import ascii_letters
|
||||
from typing import Union, Dict, Callable, Tuple, List, TypeVar
|
||||
|
||||
|
||||
Number = Union[int, float]
|
||||
NamespaceValue = Union[Number, Callable]
|
||||
Namespace = Dict[str, NamespaceValue]
|
||||
|
||||
UNARY_OPERATORS = {"+", "-"}
|
||||
ALLOWED_LITERALS = ascii_letters + "_"
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class Stack(List[T]):
|
||||
@property
|
||||
def top(self):
|
||||
return self[-1]
|
||||
|
||||
|
||||
class LexemeType(enum.IntEnum):
|
||||
UNKNOWN = 0
|
||||
NUMBER = 1
|
||||
HEXNUMBER = 2
|
||||
FLOAT = 3
|
||||
LITERAL = 4
|
||||
OPERATOR = 5
|
||||
LPAREN = 6 # (
|
||||
RPAREN = 7 # )
|
||||
DOT = 8
|
||||
COMMA = 9
|
||||
EOL = 10
|
||||
STRING = 11
|
||||
|
||||
|
||||
class TokenKind(enum.IntEnum):
|
||||
NUMBER = 0
|
||||
LITERAL = 1
|
||||
OPERATOR = 2
|
||||
UNARY_OPERATOR = 3
|
||||
PAREN = 4
|
||||
FUNC = 5
|
||||
STRING = 6
|
||||
OTHER = 7
|
||||
|
||||
|
||||
class TokenType(enum.IntEnum):
|
||||
FLOAT = 0
|
||||
INTEGER = 1
|
||||
OP_EQ = 2
|
||||
OP_EQEQ = 3
|
||||
OP_NOTEQ = 4
|
||||
OP_ADD = 5
|
||||
OP_SUB = 6
|
||||
OP_DIV = 7
|
||||
OP_MUL = 8
|
||||
OP_POW = 9
|
||||
OP_LSHIFT = 10
|
||||
OP_RSHIFT = 11
|
||||
OP_BITWISE_AND = 12
|
||||
OP_BITWISE_OR = 13
|
||||
OP_BITWISE_XOR = 14
|
||||
OP_MOD = 15
|
||||
OP_FLOORDIV = 16
|
||||
OP_SEMICOLON = 17
|
||||
OP_COMMA = 18
|
||||
OP_GT = 19
|
||||
OP_GE = 20
|
||||
OP_LT = 21
|
||||
OP_LE = 22
|
||||
OP_DOT = 35
|
||||
UN_POS = 23
|
||||
UN_NEG = 24
|
||||
LPAREN = 25
|
||||
RPAREN = 26
|
||||
VAR = 27
|
||||
IDENTIFIER = 28
|
||||
FUNCCALL = 29
|
||||
FUNCDEF = 30
|
||||
FUNCNAME = 31
|
||||
FUNC = 32
|
||||
STRING = 33
|
||||
OTHER = 34
|
||||
|
||||
|
||||
OPERATORS_TABLE = {
|
||||
"+": TokenType.OP_ADD,
|
||||
"-": TokenType.OP_SUB,
|
||||
"/": TokenType.OP_DIV,
|
||||
"//": TokenType.OP_FLOORDIV,
|
||||
"*": TokenType.OP_MUL,
|
||||
"**": TokenType.OP_POW,
|
||||
"%": TokenType.OP_MOD,
|
||||
"<<": TokenType.OP_LSHIFT,
|
||||
">>": TokenType.OP_RSHIFT,
|
||||
"&": TokenType.OP_BITWISE_AND,
|
||||
"|": TokenType.OP_BITWISE_OR,
|
||||
"^": TokenType.OP_BITWISE_XOR,
|
||||
|
||||
"==": TokenType.OP_EQEQ,
|
||||
"!=": TokenType.OP_NOTEQ,
|
||||
">": TokenType.OP_GT,
|
||||
">=": TokenType.OP_GE,
|
||||
"<": TokenType.OP_LT,
|
||||
"<=": TokenType.OP_LE,
|
||||
|
||||
".": TokenType.OP_DOT,
|
||||
|
||||
";": TokenType.OP_SEMICOLON,
|
||||
"=": TokenType.OP_EQ,
|
||||
",": TokenType.OP_COMMA
|
||||
}
|
||||
|
||||
OPERATORS_CHARS = set(reduce(add, OPERATORS_TABLE.keys()))
|
||||
|
||||
|
||||
class Priorities(enum.IntEnum):
|
||||
NONE = 0
|
||||
MINIMAL = 1
|
||||
MEDIUM = 2
|
||||
HIGH = 3
|
||||
MAXIMAL = 4
|
||||
|
||||
|
||||
PRIORITIES_TABLE = {
|
||||
TokenType.OP_ADD: Priorities.MINIMAL,
|
||||
TokenType.OP_SUB: Priorities.MINIMAL,
|
||||
|
||||
TokenType.OP_DIV: Priorities.MEDIUM,
|
||||
TokenType.OP_FLOORDIV: Priorities.MEDIUM,
|
||||
TokenType.OP_MUL: Priorities.MEDIUM,
|
||||
TokenType.OP_MOD: Priorities.MEDIUM,
|
||||
TokenType.OP_LSHIFT: Priorities.MEDIUM,
|
||||
TokenType.OP_RSHIFT: Priorities.MEDIUM,
|
||||
TokenType.OP_BITWISE_AND: Priorities.MEDIUM,
|
||||
TokenType.OP_BITWISE_OR: Priorities.MEDIUM,
|
||||
TokenType.OP_BITWISE_XOR: Priorities.MEDIUM,
|
||||
|
||||
TokenType.UN_POS: Priorities.HIGH,
|
||||
TokenType.UN_NEG: Priorities.HIGH,
|
||||
|
||||
TokenType.OP_POW: Priorities.MAXIMAL,
|
||||
TokenType.FUNCCALL: Priorities.MAXIMAL,
|
||||
TokenType.FUNCDEF: Priorities.MAXIMAL,
|
||||
TokenType.OP_DOT: Priorities.MAXIMAL,
|
||||
|
||||
TokenType.OP_EQ: Priorities.NONE,
|
||||
TokenType.OP_EQEQ: Priorities.NONE,
|
||||
TokenType.OP_NOTEQ: Priorities.NONE,
|
||||
TokenType.OP_GT: Priorities.NONE,
|
||||
TokenType.OP_GE: Priorities.NONE,
|
||||
TokenType.OP_LT: Priorities.NONE,
|
||||
TokenType.OP_LE: Priorities.NONE,
|
||||
TokenType.OP_COMMA: Priorities.NONE,
|
||||
TokenType.OP_SEMICOLON: Priorities.NONE,
|
||||
}
|
||||
|
||||
|
||||
class PyCalcError(Exception):
|
||||
def __init__(self, message: str, pos: Tuple[int, int]):
|
||||
self.message = message
|
||||
self.pos = pos
|
||||
super().__init__(message)
|
||||
|
||||
class InvalidSyntaxError(PyCalcError):
|
||||
def __init__(self, message: str, pos: Tuple[int, int], offending_token: str):
|
||||
self.offending_token = offending_token
|
||||
super().__init__(message, pos)
|
||||
|
||||
class ArgumentsError(PyCalcError):
|
||||
def __init__(self, message: str, pos: Tuple[int, int], function_name: str, expected_args: int, received_args: int):
|
||||
self.function_name = function_name
|
||||
self.expected_args = expected_args
|
||||
self.received_args = received_args
|
||||
super().__init__(message, pos)
|
||||
|
||||
class NameNotFoundError(PyCalcError):
|
||||
def __init__(self, message: str, pos: Tuple[int, int], variable_name: str):
|
||||
self.variable_name = variable_name
|
||||
super().__init__(message, pos)
|
||||
|
||||
class UnknownTokenError(PyCalcError):
|
||||
def __init__(self, message: str, pos: Tuple[int, int], token: str):
|
||||
self.token = token
|
||||
super().__init__(message, pos)
|
||||
|
||||
class ExternalFunctionError(PyCalcError):
|
||||
def __init__(self, message: str, pos: Tuple[int, int], function_name: str):
|
||||
self.function_name = function_name
|
||||
super().__init__(message, pos)
|
||||
|
||||
class NoCodeError(Exception):
|
||||
pass
|
||||
1
std/__init__.py
Normal file
1
std/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from . import stdlibrary
|
||||
25
std/stdio.py
Normal file
25
std/stdio.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from typing import List
|
||||
|
||||
|
||||
def print_(*values) -> int:
|
||||
print(*values, sep="", end="")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def println_(*values) -> int:
|
||||
print(*values, sep="", end="\n")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def print_mem(mem: List) -> int:
|
||||
print(*mem, sep="", end="")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def println_mem(mem: List) -> int:
|
||||
print(*mem, sep="", end="\n")
|
||||
|
||||
return 0
|
||||
50
std/stdlibrary.py
Normal file
50
std/stdlibrary.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
from math import pi
|
||||
from functools import reduce
|
||||
from typing import Callable, Iterable
|
||||
|
||||
from . import stdmem, stdstatements, stdio
|
||||
|
||||
|
||||
def _as_list(func: Callable) -> Callable[[Callable, Iterable], list]:
|
||||
def decorator(a: Callable, b: Iterable) -> list:
|
||||
return list(func(a, b))
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
stdnamespace = {
|
||||
"rt": lambda a, b: a ** (1/b),
|
||||
"sqrt": lambda a: a ** (1/2),
|
||||
"cbrt": lambda a: a ** (1/3),
|
||||
"int": int,
|
||||
"float": float,
|
||||
"str": str,
|
||||
"strjoin": str.join,
|
||||
"range": range,
|
||||
"inv": lambda a: ~a,
|
||||
"pi": pi,
|
||||
|
||||
"write": lambda target, value: target.write(value),
|
||||
"print": stdio.print_,
|
||||
"println": stdio.println_,
|
||||
"input": input,
|
||||
"chr": chr,
|
||||
"ord": ord,
|
||||
|
||||
"malloc": stdmem.mem_alloc,
|
||||
"mallocfor": stdmem.mem_allocfor,
|
||||
"get": stdmem.mem_get,
|
||||
"set": stdmem.mem_set,
|
||||
"slice": stdmem.slice_,
|
||||
"len": len,
|
||||
|
||||
"map": _as_list(map),
|
||||
"filter": _as_list(filter),
|
||||
"reduce": reduce,
|
||||
"while": stdstatements.while_,
|
||||
"if": stdstatements.if_else,
|
||||
"branch": stdstatements.branch,
|
||||
|
||||
"nop": lambda: 0,
|
||||
"call": lambda func: func(),
|
||||
}
|
||||
28
std/stdmem.py
Normal file
28
std/stdmem.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
from typing import List
|
||||
|
||||
|
||||
def mem_alloc(size: int) -> List[int]:
|
||||
return [0] * size
|
||||
|
||||
|
||||
def mem_allocfor(*values: int) -> List[int]:
|
||||
return list(values)
|
||||
|
||||
|
||||
def mem_get(mem: List[int], offset: int) -> int:
|
||||
if 0 > offset >= len(mem):
|
||||
return -1
|
||||
|
||||
return mem[offset]
|
||||
|
||||
|
||||
def mem_set(mem: List[int], offset: int, value: int) -> int:
|
||||
if 0 > offset or offset >= len(mem) or 0 > value > 255:
|
||||
return -1
|
||||
|
||||
mem[offset] = value
|
||||
return 0
|
||||
|
||||
|
||||
def slice_(mem: List[int], begin: int, end: int) -> List[int]:
|
||||
return mem[begin:end]
|
||||
48
std/stdstatements.py
Normal file
48
std/stdstatements.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
from itertools import islice
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
from src.tokentypes.types import Number, ArgumentsError
|
||||
|
||||
|
||||
def if_else(
|
||||
condition: Number,
|
||||
if_cb: Callable,
|
||||
else_cb: Optional[Callable] = None) -> Number:
|
||||
if else_cb is None:
|
||||
return _if(condition, if_cb)
|
||||
|
||||
return if_cb() if condition else else_cb()
|
||||
|
||||
|
||||
def _if(condition: Number, cb: Callable) -> int:
|
||||
return cb() if condition else 0
|
||||
|
||||
|
||||
def while_(condition: Callable, body: Callable) -> int:
|
||||
while condition():
|
||||
body()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def branch(*values: Union[Number, Callable]) -> int:
|
||||
"""
|
||||
This is also kind of if, but a bit better
|
||||
"""
|
||||
|
||||
if len(values) < 2 or callable(values[0]):
|
||||
raise ArgumentsError("invalid arguments")
|
||||
|
||||
pairs = zip(
|
||||
islice(values, None, None, 2),
|
||||
islice(values, 1, None, 2)
|
||||
)
|
||||
|
||||
for cond, callback in pairs:
|
||||
if cond:
|
||||
return callback()
|
||||
|
||||
if len(values) % 2:
|
||||
return values[-1]()
|
||||
|
||||
return 0
|
||||
7
tests/__init__.py
Normal file
7
tests/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
from unittest import TestSuite
|
||||
|
||||
from .testcases import evaluation_tests
|
||||
|
||||
|
||||
full_suite = TestSuite()
|
||||
full_suite.addTest(evaluation_tests)
|
||||
8
tests/__main__.py
Normal file
8
tests/__main__.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
import unittest
|
||||
|
||||
from . import full_suite
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runner = unittest.TextTestRunner(verbosity=2)
|
||||
runner.run(full_suite)
|
||||
236
tests/testcases.py
Normal file
236
tests/testcases.py
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
from math import pi
|
||||
from unittest import TestCase, TestSuite, makeSuite
|
||||
|
||||
from std.stdlibrary import stdnamespace
|
||||
from src.tokentypes.tokens import Function
|
||||
from src.interpreter.interpret import Interpreter
|
||||
from src.tokentypes.types import InvalidSyntaxError
|
||||
|
||||
|
||||
interpreter = Interpreter()
|
||||
evaluate = lambda code: interpreter.interpret(code, stdnamespace)
|
||||
|
||||
|
||||
class TestNumbers(TestCase):
|
||||
def test_integer(self):
|
||||
self.assertEqual(evaluate("100"), 100)
|
||||
|
||||
def test_float(self):
|
||||
self.assertEqual(evaluate("0.1"), 0.1)
|
||||
self.assertEqual(evaluate(".1"), 0.1)
|
||||
|
||||
def test_hexdecimal(self):
|
||||
self.assertEqual(evaluate("0x175ffa14"), 0x175ffa14)
|
||||
|
||||
|
||||
class TestBasicOperations(TestCase):
|
||||
def test_addition(self):
|
||||
self.assertEqual(evaluate("1+1"), 2)
|
||||
|
||||
def test_subtraction(self):
|
||||
self.assertEqual(evaluate("1-1"), 0)
|
||||
|
||||
def test_multiplication(self):
|
||||
self.assertEqual(evaluate("1*1"), 1)
|
||||
|
||||
def test_division(self):
|
||||
self.assertEqual(evaluate("1/2"), .5)
|
||||
|
||||
def test_floordivision(self):
|
||||
self.assertEqual(evaluate("3//2"), 1)
|
||||
|
||||
def test_modulo(self):
|
||||
self.assertEqual(evaluate("7%2"), 1)
|
||||
|
||||
def test_lshift(self):
|
||||
self.assertEqual(evaluate("1<<5"), 32)
|
||||
|
||||
def test_rshift(self):
|
||||
self.assertEqual(evaluate("128>>5"), 4)
|
||||
|
||||
def test_bitwise_and(self):
|
||||
self.assertEqual(evaluate("32 & 64"), 0)
|
||||
|
||||
def test_bitwise_or(self):
|
||||
self.assertEqual(evaluate("81 | 82"), 83)
|
||||
|
||||
def test_bitwise_xor(self):
|
||||
self.assertEqual(evaluate("54^87"), 97)
|
||||
|
||||
def test_exponentiation(self):
|
||||
self.assertEqual(evaluate("2**3"), 8)
|
||||
|
||||
def test_unary_addition(self):
|
||||
self.assertEqual(evaluate("+1"), 1)
|
||||
|
||||
def test_unary_subtraction(self):
|
||||
self.assertEqual(evaluate("-1"), -1)
|
||||
|
||||
def test_unary_subtraction_multiple(self):
|
||||
self.assertEqual(evaluate("--1"), 1)
|
||||
self.assertEqual(evaluate("---1"), -1)
|
||||
|
||||
def test_equality(self):
|
||||
self.assertEqual(evaluate("2==2"), 1)
|
||||
self.assertEqual(evaluate("2!=2"), 0)
|
||||
|
||||
def test_less_than(self):
|
||||
self.assertEqual(evaluate("1<2"), 1)
|
||||
self.assertEqual(evaluate("2<1"), 0)
|
||||
|
||||
def test_less_equal(self):
|
||||
self.assertEqual(evaluate("2<=3"), 1)
|
||||
self.assertEqual(evaluate("2<=2"), 1)
|
||||
self.assertEqual(evaluate("2<=1"), 0)
|
||||
|
||||
def test_more_than(self):
|
||||
self.assertEqual(evaluate("2>1"), 1)
|
||||
self.assertEqual(evaluate("1>2"), 0)
|
||||
|
||||
def test_more_equal(self):
|
||||
self.assertEqual(evaluate("2>=1"), 1)
|
||||
self.assertEqual(evaluate("2>=2"), 1)
|
||||
self.assertEqual(evaluate("2>=3"), 0)
|
||||
|
||||
|
||||
class TestOperatorsPriority(TestCase):
|
||||
def test_addition_multiplication(self):
|
||||
self.assertEqual(evaluate("2+2*2"), 6)
|
||||
|
||||
def test_addition_division(self):
|
||||
self.assertEqual(evaluate("2+2/2"), 3)
|
||||
|
||||
def test_addition_exponentiation(self):
|
||||
self.assertEqual(evaluate("1+2**3"), 9)
|
||||
|
||||
def test_subtraction_addition(self):
|
||||
self.assertEqual(evaluate("1-2+3"), 2)
|
||||
|
||||
def test_subtraction_subtraction(self):
|
||||
self.assertEqual(evaluate("1-2-3"), -4)
|
||||
|
||||
def test_subtraction_multiplication(self):
|
||||
self.assertEqual(evaluate("2-2*2"), -2)
|
||||
|
||||
def test_subtraction_division(self):
|
||||
self.assertEqual(evaluate("2-2/2"), 1)
|
||||
|
||||
def test_subtraction_exponentiation(self):
|
||||
self.assertEqual(evaluate("1-2**3"), -7)
|
||||
|
||||
def test_multiplicaion_exponentiation(self):
|
||||
self.assertEqual(evaluate("2*10**2"), 200)
|
||||
|
||||
def test_division_exponentiation(self):
|
||||
self.assertEqual(evaluate("1/10**2"), 0.01)
|
||||
|
||||
def test_exponentiation_right_associativity(self):
|
||||
self.assertEqual(evaluate("2**3**2"), 512)
|
||||
|
||||
def test_exponentiation_unary_subtraction(self):
|
||||
self.assertEqual(evaluate("2**-3"), 0.125)
|
||||
|
||||
def test_unary_subtraction_exponentiation(self):
|
||||
self.assertEqual(evaluate("-2**2"), -4)
|
||||
|
||||
|
||||
class TestVariables(TestCase):
|
||||
def test_get_pi(self):
|
||||
self.assertEqual(evaluate("pi"), pi)
|
||||
|
||||
def test_negotate_pi(self):
|
||||
self.assertEqual(evaluate("-pi"), -pi)
|
||||
|
||||
def test_expression_with_constant(self):
|
||||
self.assertEqual(evaluate("pi+2.0-3"), pi + 2 - 3)
|
||||
self.assertEqual(evaluate("2.0+pi-3"), 2 + pi - 3)
|
||||
self.assertEqual(evaluate("2.0-3+pi"), 2 - 3 + pi)
|
||||
|
||||
def test_declare_var(self):
|
||||
self.assertEqual(evaluate("a=5+5"), 10)
|
||||
|
||||
def test_get_declared_var(self):
|
||||
self.assertEqual(evaluate("a=10 \n a"), 10)
|
||||
|
||||
|
||||
class TestFunctions(TestCase):
|
||||
def test_funccall(self):
|
||||
self.assertEqual(evaluate("rt(25, 2)"), 5)
|
||||
|
||||
def test_nested_funccall(self):
|
||||
self.assertEqual(evaluate("rt(rt(625, 2), 2)"), 5)
|
||||
|
||||
def test_expr_in_funccall(self):
|
||||
self.assertEqual(evaluate("rt(20+5, 1.0+1.0)"), 5)
|
||||
|
||||
def test_funcdef(self):
|
||||
func_a = evaluate("a()=5")
|
||||
self.assertIsInstance(func_a, Function)
|
||||
self.assertEqual(func_a.name, "a()")
|
||||
|
||||
func_b = evaluate("b(x)=x+1")
|
||||
self.assertIsInstance(func_b, Function)
|
||||
self.assertEqual(func_b.name, "b(x)")
|
||||
|
||||
func_c = evaluate("c(x,y)=x*y")
|
||||
self.assertIsInstance(func_c, Function)
|
||||
self.assertEqual(func_c.name, "c(x,y)")
|
||||
|
||||
def test_def_func_call(self):
|
||||
self.assertEqual(evaluate("f(x,y)=x*y \n f(2,5)"), 10)
|
||||
|
||||
def test_def_func_argexpr(self):
|
||||
self.assertEqual(evaluate("f(x,y)=x*y \n f(2+5, 3*2)"), 42)
|
||||
|
||||
def test_funcdef_argexpr(self):
|
||||
with self.assertRaises(InvalidSyntaxError):
|
||||
evaluate("f(x+1)=x+2")
|
||||
|
||||
with self.assertRaises(InvalidSyntaxError):
|
||||
evaluate("f(1)=2")
|
||||
|
||||
def test_funcdef_missed_brace(self):
|
||||
with self.assertRaises(InvalidSyntaxError):
|
||||
evaluate("f(x=2")
|
||||
|
||||
with self.assertRaises(InvalidSyntaxError):
|
||||
evaluate("fx)=2")
|
||||
|
||||
def test_funcdef_no_body(self):
|
||||
with self.assertRaises(InvalidSyntaxError):
|
||||
evaluate("f(x)=")
|
||||
|
||||
|
||||
class TestLambdas(TestCase):
|
||||
def test_assign_to_var(self):
|
||||
self.assertEqual(evaluate("a=(x)=x+1 \n a(1)"), 2)
|
||||
|
||||
def test_lambda_as_argument(self):
|
||||
self.assertEqual(evaluate("""
|
||||
sum(mem)=reduce((x,y)=x+y, mem)
|
||||
range(begin, end) = i=begin-1; map((x)=i=i+1;x+i, malloc(end-begin))
|
||||
sum(range(0,5))
|
||||
"""), 10)
|
||||
|
||||
def test_missing_brace_in_arglambda(self):
|
||||
with self.assertRaises(InvalidSyntaxError):
|
||||
evaluate("sum(mem)=reduce(x,y)=x+y, mem)")
|
||||
|
||||
with self.assertRaises(InvalidSyntaxError):
|
||||
evaluate("sum(mem)=reduce((x,y=x+y, mem)")
|
||||
|
||||
def test_missing_brace_in_vardecl_lambda(self):
|
||||
with self.assertRaises(InvalidSyntaxError):
|
||||
evaluate("a=(x=x+1")
|
||||
|
||||
with self.assertRaises(InvalidSyntaxError):
|
||||
evaluate("a=x)=x+1")
|
||||
|
||||
|
||||
evaluation_tests = TestSuite()
|
||||
evaluation_tests.addTest(makeSuite(TestNumbers))
|
||||
evaluation_tests.addTest(makeSuite(TestBasicOperations))
|
||||
evaluation_tests.addTest(makeSuite(TestOperatorsPriority))
|
||||
evaluation_tests.addTest(makeSuite(TestVariables))
|
||||
evaluation_tests.addTest(makeSuite(TestFunctions))
|
||||
evaluation_tests.addTest(makeSuite(TestLambdas))
|
||||
Loading…
Reference in a new issue