diff --git a/src/util/expression/ast.rs b/src/util/expression/ast.rs new file mode 100644 index 0000000..b8bb240 --- /dev/null +++ b/src/util/expression/ast.rs @@ -0,0 +1,22 @@ +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum BinaryOp { + Add, + Subtract, + Multiply, + Divide, + Modulus, +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum UnaryOp { + Negate, + Not, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Node { + Float(f64), + Integer(i64), + BinaryExpr { left: Box, op: BinaryOp, right: Box }, + UnaryExpr { op: UnaryOp, expr: Box }, +} diff --git a/src/util/expression/lexer.rs b/src/util/expression/lexer.rs new file mode 100644 index 0000000..761c3ab --- /dev/null +++ b/src/util/expression/lexer.rs @@ -0,0 +1,266 @@ +use std::{ + fmt::{Debug, Display, Formatter}, + iter::Peekable, + str::Chars, +}; + +use super::token::{ArithmeticOp, SourceLocation, Token}; + +pub(super) struct Cursor<'a> { + input: &'a str, + iter: Peekable>, + /// Character position in the input string + pos: usize, + location: SourceLocation, +} + +impl<'a> Cursor<'a> { + pub fn new(input: &str) -> Cursor { + Cursor { input, iter: input.chars().peekable(), pos: 0, location: SourceLocation { line: 1, column: 0 } } + } + + pub fn pos(&self) -> usize { + self.pos + } + + fn inc_pos(&mut self) { + let c = self.iter.peek(); + if c == Some(&'\n') { + self.location.line += 1; + self.location.column = 0; + } else { + self.location.column += 1; + } + self.pos += 1; + } + + pub fn next(&mut self) -> Option { + let c = self.iter.next()?; + self.inc_pos(); + Some(c) + } + + pub fn next_if(&mut self, func: impl FnOnce(&char) -> bool) -> Option { + let c = self.iter.peek()?; + if func(c) { + self.next() + } else { + None + } + } + + pub fn peek(&mut self) -> Option { + self.iter.peek().copied() + } +} + +pub(super) struct Tokenizer<'a> { + cursor: Cursor<'a>, + pub tokens: Vec<(Token, SourceLocation)>, +} + +impl Tokenizer<'_> { + pub fn new(input: &str) -> Tokenizer { + Tokenizer { cursor: Cursor::new(input), tokens: Vec::new() } + } + + pub fn scan(&mut self) -> LexResult { + while let Some(c) = self.cursor.peek() { + self.scan_item(c)?; + } + + Ok(()) + } + + fn next(&mut self) -> LexResult { + self.cursor.next().ok_or_else(|| LexError::UnexpectedEndOfInput) + } + + fn next_if(&mut self, func: impl FnOnce(&char) -> bool) -> LexResult { + self.cursor.next_if(func).ok_or_else(|| LexError::UnexpectedEndOfInput) + } + + fn scan_item(&mut self, c: char) -> LexResult { + match c { + ' ' | '\t' | '\n' => { + self.cursor.next(); + return Ok(()); + } + '0'..='9' => self.number(), + '+' | '-' | '*' | '/' | '%' => self.operator(), + '(' | ')' => self.bracket(), + _ => return self.unexpected(c), + } + } + + fn number(&mut self) -> LexResult { + let mut value = String::new(); + let mut is_fractional = false; + let loc = self.cursor.location; + + while let Ok(c) = self.next_if(|c| matches!(c, '0'..='9' | '.')) { + if c == '.' { + if is_fractional { + return self.unexpected(c); + } + is_fractional = true; + } + + value.push(c); + } + + self.tokens.push((Token::Number { value, is_fractional }, loc)); + + Ok(()) + } + + fn operator(&mut self) -> LexResult { + let loc = self.cursor.location; + let c = self.next()?; + match c { + '+' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Add), loc)), + '-' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Subtract), loc)), + '*' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Multiply), loc)), + '/' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Divide), loc)), + '%' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Modulus), loc)), + _ => return self.unexpected(c), + } + Ok(()) + } + + fn bracket(&mut self) -> LexResult { + let loc = self.cursor.location; + let c = self.next()?; + match c { + '(' => self.tokens.push((Token::LeftParen, loc)), + ')' => self.tokens.push((Token::RightParen, loc)), + _ => return self.unexpected(c), + } + Ok(()) + } + + fn unexpected(&self, c: char) -> LexResult { + let loc = self.cursor.location; + Err(LexError::UnexpectedChar { c, loc }) + } +} + +pub type LexResult = Result; + +pub enum LexError { + UnexpectedChar { c: char, loc: SourceLocation }, + UnexpectedEndOfInput, +} + +impl Display for LexError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + LexError::UnexpectedChar { c, loc } => { + write!(f, "Unexpected character '{}' at {}", c, loc) + } + LexError::UnexpectedEndOfInput => write!(f, "Unexpected end of input"), + } + } +} + +impl Debug for LexError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Display::fmt(self, f) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_expression(input: &str, result: Vec) { + let mut tok = Tokenizer::new(input); + tok.scan().unwrap(); + // println!("tokens: {:?}", tok.tokens); + let tokens = tok.tokens.into_iter().map(|(t, _)| t).collect::>(); + assert_eq!(tokens, result); + } + + #[test] + fn test_number() { + test_expression("1", vec![Token::Number { value: "1".to_string(), is_fractional: false }]); + test_expression("1.0", vec![Token::Number { value: "1.0".to_string(), is_fractional: true }]); + } + + #[test] + fn test_arithmetic_ops() { + test_expression( + "1 + 22 * 33 / 4444 + 55555", + vec![ + Token::Number { value: "1".to_string(), is_fractional: false }, + Token::ArithmeticOp(ArithmeticOp::Add), + Token::Number { value: "22".to_string(), is_fractional: false }, + Token::ArithmeticOp(ArithmeticOp::Multiply), + Token::Number { value: "33".to_string(), is_fractional: false }, + Token::ArithmeticOp(ArithmeticOp::Divide), + Token::Number { value: "4444".to_string(), is_fractional: false }, + Token::ArithmeticOp(ArithmeticOp::Add), + Token::Number { value: "55555".to_string(), is_fractional: false }, + ], + ); + + test_expression( + "1.0 + 2.0", + vec![ + Token::Number { value: "1.0".to_string(), is_fractional: true }, + Token::ArithmeticOp(ArithmeticOp::Add), + Token::Number { value: "2.0".to_string(), is_fractional: true }, + ], + ); + test_expression( + "2.0 - 3", + vec![ + Token::Number { value: "2.0".to_string(), is_fractional: true }, + Token::ArithmeticOp(ArithmeticOp::Subtract), + Token::Number { value: "3".to_string(), is_fractional: false }, + ], + ); + } + + #[test] + fn test_brackets() { + test_expression( + "(1 + 2)", + vec![ + Token::LeftParen, + Token::Number { value: "1".to_string(), is_fractional: false }, + Token::ArithmeticOp(ArithmeticOp::Add), + Token::Number { value: "2".to_string(), is_fractional: false }, + Token::RightParen, + ], + ); + test_expression( + "2 * (3 + 4)", + vec![ + Token::Number { value: "2".to_string(), is_fractional: false }, + Token::ArithmeticOp(ArithmeticOp::Multiply), + Token::LeftParen, + Token::Number { value: "3".to_string(), is_fractional: false }, + Token::ArithmeticOp(ArithmeticOp::Add), + Token::Number { value: "4".to_string(), is_fractional: false }, + Token::RightParen, + ], + ); + test_expression( + "1 + ((2 * 3) + 4)", + vec![ + Token::Number { value: "1".to_string(), is_fractional: false }, + Token::ArithmeticOp(ArithmeticOp::Add), + Token::LeftParen, + Token::LeftParen, + Token::Number { value: "2".to_string(), is_fractional: false }, + Token::ArithmeticOp(ArithmeticOp::Multiply), + Token::Number { value: "3".to_string(), is_fractional: false }, + Token::RightParen, + Token::ArithmeticOp(ArithmeticOp::Add), + Token::Number { value: "4".to_string(), is_fractional: false }, + Token::RightParen, + ], + ); + } +} diff --git a/src/util/expression/mod.rs b/src/util/expression/mod.rs new file mode 100644 index 0000000..bf2ceae --- /dev/null +++ b/src/util/expression/mod.rs @@ -0,0 +1,5 @@ +pub mod ast; +pub mod lexer; +pub mod parser; +pub mod token; +pub mod vm; \ No newline at end of file diff --git a/src/util/expression/parser.rs b/src/util/expression/parser.rs new file mode 100644 index 0000000..69626ef --- /dev/null +++ b/src/util/expression/parser.rs @@ -0,0 +1,206 @@ +use std::fmt::{Debug, Display}; + +use super::{ + ast::{BinaryOp, Node, UnaryOp}, + token::{ArithmeticOp, SourceLocation, Token}, +}; + +pub struct Parser { + ast: Vec, + tokens: Vec<(Token, SourceLocation)>, + pos: usize, +} + +// -- the grammar -- +// expression -> term +// term -> factor ( ('+' | '-') factor )* +// factor -> unary ( ('*' | '/' | '%') unary )* +// unary -> ('!' | '-') unary | primary +// primary -> NUMBER | '(' expression ')' + +impl Parser { + pub fn new(tokens: Vec<(Token, SourceLocation)>) -> Parser { + Parser { tokens, pos: 0, ast: Vec::new() } + } + + fn current(&self) -> ParseResult<&(Token, SourceLocation)> { + self.tokens.get(self.pos).ok_or_else(|| ParseError::UnexpectedEnd) + } + + fn advance(&mut self) { + self.pos += 1; + } + + fn is_end(&self) -> bool { + self.pos >= self.tokens.len() + } + + fn expect(&self, token: Token) -> bool { + !self.is_end() && self.current().map(|(t, _)| t == &token).unwrap_or(false) + } + + fn parse_expr(&mut self) -> ParseResult { + self.parse_term() + } + + fn parse_term(&mut self) -> ParseResult { + let mut node = self.parse_factor()?; + + while !self.is_end() { + let op = if let (Token::ArithmeticOp(op), _) = self.current()? { + match op { + ArithmeticOp::Add => BinaryOp::Add, + ArithmeticOp::Subtract => BinaryOp::Subtract, + _ => break, + } + } else { + break; + }; + + self.advance(); + let right = self.parse_factor()?; + node = Node::BinaryExpr { left: Box::new(node), op, right: Box::new(right) }; + } + + Ok(node) + } + + fn parse_factor(&mut self) -> ParseResult { + let mut node = self.parse_unary()?; + + while !self.is_end() { + let op = if let (Token::ArithmeticOp(op), _) = self.current()? { + match op { + ArithmeticOp::Multiply => BinaryOp::Multiply, + ArithmeticOp::Divide => BinaryOp::Divide, + ArithmeticOp::Modulus => BinaryOp::Modulus, + _ => break, + } + } else { + break; + }; + + self.advance(); + let right = self.parse_unary()?; + node = Node::BinaryExpr { left: Box::new(node), op, right: Box::new(right) }; + } + + Ok(node) + } + + fn parse_unary(&mut self) -> ParseResult { + let current = self.current()?; + if let (Token::ArithmeticOp(ArithmeticOp::Subtract), _) = current { + self.advance(); + let expr = self.parse_unary()?; + Ok(Node::UnaryExpr { op: UnaryOp::Negate, expr: Box::new(expr) }) + } else { + self.parse_primary() + } + } + + fn parse_primary(&mut self) -> ParseResult { + if let (Token::Number { .. }, _) = self.current()? { + let (current, loc) = self.current()?; + let node = match current { + Token::Number { value, is_fractional } => { + if *is_fractional { + Node::Float(value.parse().unwrap()) + } else { + Node::Integer(value.parse().unwrap()) + } + } + _ => return Err(ParseError::UnexpectedToken { token: current.clone(), loc: *loc }), + }; + + self.advance(); + Ok(node) + } else if self.expect(Token::LeftParen) { + self.advance(); + let node = self.parse_expr()?; + if !self.expect(Token::RightParen) { + return self.unexpected(); + } + self.advance(); + Ok(node) + } else { + self.unexpected() + } + } + + fn unexpected(&self) -> ParseResult { + let (token, loc) = self.current()?; + Err(ParseError::UnexpectedToken { token: token.clone(), loc: loc.clone() }) + } + + pub fn parse(&mut self) -> ParseResult { + let mut ast = Vec::new(); + while !self.is_end() { + ast.push(self.parse_expr()?); + } + + self.ast = ast; + + Ok(()) + } +} + +type ParseResult = Result; + +pub enum ParseError { + UnexpectedToken { token: Token, loc: SourceLocation }, + UnexpectedEnd, +} + +impl Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ParseError::UnexpectedToken { token, loc } => { + write!(f, "Unexpected token {:?} at {}", token, loc) + } + ParseError::UnexpectedEnd => write!(f, "Unexpected end of input"), + } + } +} + +impl Debug for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(self, f) + } +} + +#[cfg(test)] +mod tests { + use crate::util::expression::{ast::BinaryOp, lexer::Tokenizer}; + + use super::*; + + fn parse(input: &str) -> Vec { + let mut tokenizer = Tokenizer::new(input); + tokenizer.scan().unwrap(); + + let mut parser = Parser::new(tokenizer.tokens); + parser.parse().unwrap(); + + println!("{:#?}", parser.ast); + parser.ast + } + + #[test] + fn test_parser() { + parse("-4 + (2 * 3) % 5 + (2 + 2) * 2"); + + assert_eq!( + parse("1 + 2 * 3"), + vec![Node::BinaryExpr { + left: Box::new(Node::Integer(1)), + op: BinaryOp::Add, + right: Box::new(Node::BinaryExpr { + left: Box::new(Node::Integer(2)), + op: BinaryOp::Multiply, + right: Box::new(Node::Integer(3)), + }), + }] + ); + } +} diff --git a/src/util/expression/token.rs b/src/util/expression/token.rs new file mode 100644 index 0000000..3fc84c6 --- /dev/null +++ b/src/util/expression/token.rs @@ -0,0 +1,60 @@ +use std::fmt::Display; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ArithmeticOp { + Add, + Subtract, + Multiply, + Divide, + Modulus, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum LogicalOp { + And, + Or, + Not, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ComparisonOp { + Equal, + NotEqual, + LessThan, + GreaterThan, + LessThanOrEqual, + GreaterThanOrEqual, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum BitwiseOp { + And, + Or, + Xor, + Not, + ShiftLeft, + ShiftRight, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum Token { + Number { value: String, is_fractional: bool }, + ArithmeticOp(ArithmeticOp), + LogicalOp(LogicalOp), + ComparisonOp(ComparisonOp), + BitwiseOp(BitwiseOp), + LeftParen, + RightParen, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct SourceLocation { + pub line: usize, + pub column: usize, +} + +impl Display for SourceLocation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "line {}, column {}", self.line, self.column) + } +} diff --git a/src/util/expression/vm.rs b/src/util/expression/vm.rs new file mode 100644 index 0000000..0af949e --- /dev/null +++ b/src/util/expression/vm.rs @@ -0,0 +1,68 @@ +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum VMOpcode { + // No operation + NOP, + // Pop the top value off the stack + STPOP, + // Loads a specified variable onto the stack + STLD(u16), + // Pushes a number onto the stack + STPSH(f64), + // Adds the top (int, int) from the stack + ADDII, + // Adds the top (float, float) from the stack + ADDFF, + // Subtracts the (int, int) from the stack + SUBII, + // Subtracts the (float, float) from the stack + SUBFF, + // Multiplies the top (int, int) from the stack + MULII, + // Multiplies the top (float, float) from the stack + MULFF, + // Divides the top (int, int) from the stack + DIVII, + // Divides the top (float, float) from the stack + DIVFF, + // Modulus of the top (int, int) from the stack + MODII, + // Modulus of the top (float, float) from the stack + MODFF, + // Calls a function with specified index and passes 1 argument from the stack + CALL1(u16), + // Calls a function with specified index and passes 2 arguments from the stack + CALL2(u16), + // Calls a function with specified index and passes 3 arguments from the stack + CALL3(u16), + + // Intrinsics + // f64(int) -> f64 + INFLOAT2INT, + // int(f64) -> int + ININT2FLOAT, + // frandom() -> f64 + INFRAND, + // irandom() -> int + INIRAND, + // floor(f64) -> f64 + INFLOOR, + // ceil(f64) -> f64 + INCEIL, + // round(f64) -> f64 + INROUND, + // abs(f64) -> f64 + INABS, + // min(f64, f64) -> f64 + INMIN, + // max(f64, f64) -> f64 + INMAX, + // sqrt(f64) -> f64 + INSQRT, + // cos(f64) -> f64 + INCOS, + // sin(f64) -> f64 + INSIN, +} + +pub struct VM { +} \ No newline at end of file diff --git a/src/util/mod.rs b/src/util/mod.rs index 8cf1c5e..740717a 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,4 +1,5 @@ pub mod atom; pub mod bitvec; pub mod browser; +pub mod expression; pub mod rng;