1
0
Fork 0
mirror of https://github.com/doukutsu-rs/doukutsu-rs synced 2024-11-21 21:22:44 +00:00

wip: expression parser

This commit is contained in:
Alula 2024-08-27 15:48:56 +02:00
parent 067376c74f
commit d41fa57ca2
No known key found for this signature in database
GPG key ID: 3E00485503A1D8BA
7 changed files with 628 additions and 0 deletions

View file

@ -0,0 +1,22 @@
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum BinaryOp {
Add,
Subtract,
Multiply,
Divide,
Modulus,
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum UnaryOp {
Negate,
Not,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Node {
Float(f64),
Integer(i64),
BinaryExpr { left: Box<Node>, op: BinaryOp, right: Box<Node> },
UnaryExpr { op: UnaryOp, expr: Box<Node> },
}

View file

@ -0,0 +1,266 @@
use std::{
fmt::{Debug, Display, Formatter},
iter::Peekable,
str::Chars,
};
use super::token::{ArithmeticOp, SourceLocation, Token};
pub(super) struct Cursor<'a> {
input: &'a str,
iter: Peekable<Chars<'a>>,
/// Character position in the input string
pos: usize,
location: SourceLocation,
}
impl<'a> Cursor<'a> {
pub fn new(input: &str) -> Cursor {
Cursor { input, iter: input.chars().peekable(), pos: 0, location: SourceLocation { line: 1, column: 0 } }
}
pub fn pos(&self) -> usize {
self.pos
}
fn inc_pos(&mut self) {
let c = self.iter.peek();
if c == Some(&'\n') {
self.location.line += 1;
self.location.column = 0;
} else {
self.location.column += 1;
}
self.pos += 1;
}
pub fn next(&mut self) -> Option<char> {
let c = self.iter.next()?;
self.inc_pos();
Some(c)
}
pub fn next_if(&mut self, func: impl FnOnce(&char) -> bool) -> Option<char> {
let c = self.iter.peek()?;
if func(c) {
self.next()
} else {
None
}
}
pub fn peek(&mut self) -> Option<char> {
self.iter.peek().copied()
}
}
pub(super) struct Tokenizer<'a> {
cursor: Cursor<'a>,
pub tokens: Vec<(Token, SourceLocation)>,
}
impl Tokenizer<'_> {
pub fn new(input: &str) -> Tokenizer {
Tokenizer { cursor: Cursor::new(input), tokens: Vec::new() }
}
pub fn scan(&mut self) -> LexResult {
while let Some(c) = self.cursor.peek() {
self.scan_item(c)?;
}
Ok(())
}
fn next(&mut self) -> LexResult<char> {
self.cursor.next().ok_or_else(|| LexError::UnexpectedEndOfInput)
}
fn next_if(&mut self, func: impl FnOnce(&char) -> bool) -> LexResult<char> {
self.cursor.next_if(func).ok_or_else(|| LexError::UnexpectedEndOfInput)
}
fn scan_item(&mut self, c: char) -> LexResult {
match c {
' ' | '\t' | '\n' => {
self.cursor.next();
return Ok(());
}
'0'..='9' => self.number(),
'+' | '-' | '*' | '/' | '%' => self.operator(),
'(' | ')' => self.bracket(),
_ => return self.unexpected(c),
}
}
fn number(&mut self) -> LexResult {
let mut value = String::new();
let mut is_fractional = false;
let loc = self.cursor.location;
while let Ok(c) = self.next_if(|c| matches!(c, '0'..='9' | '.')) {
if c == '.' {
if is_fractional {
return self.unexpected(c);
}
is_fractional = true;
}
value.push(c);
}
self.tokens.push((Token::Number { value, is_fractional }, loc));
Ok(())
}
fn operator(&mut self) -> LexResult {
let loc = self.cursor.location;
let c = self.next()?;
match c {
'+' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Add), loc)),
'-' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Subtract), loc)),
'*' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Multiply), loc)),
'/' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Divide), loc)),
'%' => self.tokens.push((Token::ArithmeticOp(ArithmeticOp::Modulus), loc)),
_ => return self.unexpected(c),
}
Ok(())
}
fn bracket(&mut self) -> LexResult {
let loc = self.cursor.location;
let c = self.next()?;
match c {
'(' => self.tokens.push((Token::LeftParen, loc)),
')' => self.tokens.push((Token::RightParen, loc)),
_ => return self.unexpected(c),
}
Ok(())
}
fn unexpected(&self, c: char) -> LexResult {
let loc = self.cursor.location;
Err(LexError::UnexpectedChar { c, loc })
}
}
pub type LexResult<T = ()> = Result<T, LexError>;
pub enum LexError {
UnexpectedChar { c: char, loc: SourceLocation },
UnexpectedEndOfInput,
}
impl Display for LexError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
LexError::UnexpectedChar { c, loc } => {
write!(f, "Unexpected character '{}' at {}", c, loc)
}
LexError::UnexpectedEndOfInput => write!(f, "Unexpected end of input"),
}
}
}
impl Debug for LexError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
Display::fmt(self, f)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_expression(input: &str, result: Vec<Token>) {
let mut tok = Tokenizer::new(input);
tok.scan().unwrap();
// println!("tokens: {:?}", tok.tokens);
let tokens = tok.tokens.into_iter().map(|(t, _)| t).collect::<Vec<_>>();
assert_eq!(tokens, result);
}
#[test]
fn test_number() {
test_expression("1", vec![Token::Number { value: "1".to_string(), is_fractional: false }]);
test_expression("1.0", vec![Token::Number { value: "1.0".to_string(), is_fractional: true }]);
}
#[test]
fn test_arithmetic_ops() {
test_expression(
"1 + 22 * 33 / 4444 + 55555",
vec![
Token::Number { value: "1".to_string(), is_fractional: false },
Token::ArithmeticOp(ArithmeticOp::Add),
Token::Number { value: "22".to_string(), is_fractional: false },
Token::ArithmeticOp(ArithmeticOp::Multiply),
Token::Number { value: "33".to_string(), is_fractional: false },
Token::ArithmeticOp(ArithmeticOp::Divide),
Token::Number { value: "4444".to_string(), is_fractional: false },
Token::ArithmeticOp(ArithmeticOp::Add),
Token::Number { value: "55555".to_string(), is_fractional: false },
],
);
test_expression(
"1.0 + 2.0",
vec![
Token::Number { value: "1.0".to_string(), is_fractional: true },
Token::ArithmeticOp(ArithmeticOp::Add),
Token::Number { value: "2.0".to_string(), is_fractional: true },
],
);
test_expression(
"2.0 - 3",
vec![
Token::Number { value: "2.0".to_string(), is_fractional: true },
Token::ArithmeticOp(ArithmeticOp::Subtract),
Token::Number { value: "3".to_string(), is_fractional: false },
],
);
}
#[test]
fn test_brackets() {
test_expression(
"(1 + 2)",
vec![
Token::LeftParen,
Token::Number { value: "1".to_string(), is_fractional: false },
Token::ArithmeticOp(ArithmeticOp::Add),
Token::Number { value: "2".to_string(), is_fractional: false },
Token::RightParen,
],
);
test_expression(
"2 * (3 + 4)",
vec![
Token::Number { value: "2".to_string(), is_fractional: false },
Token::ArithmeticOp(ArithmeticOp::Multiply),
Token::LeftParen,
Token::Number { value: "3".to_string(), is_fractional: false },
Token::ArithmeticOp(ArithmeticOp::Add),
Token::Number { value: "4".to_string(), is_fractional: false },
Token::RightParen,
],
);
test_expression(
"1 + ((2 * 3) + 4)",
vec![
Token::Number { value: "1".to_string(), is_fractional: false },
Token::ArithmeticOp(ArithmeticOp::Add),
Token::LeftParen,
Token::LeftParen,
Token::Number { value: "2".to_string(), is_fractional: false },
Token::ArithmeticOp(ArithmeticOp::Multiply),
Token::Number { value: "3".to_string(), is_fractional: false },
Token::RightParen,
Token::ArithmeticOp(ArithmeticOp::Add),
Token::Number { value: "4".to_string(), is_fractional: false },
Token::RightParen,
],
);
}
}

View file

@ -0,0 +1,5 @@
pub mod ast;
pub mod lexer;
pub mod parser;
pub mod token;
pub mod vm;

View file

@ -0,0 +1,206 @@
use std::fmt::{Debug, Display};
use super::{
ast::{BinaryOp, Node, UnaryOp},
token::{ArithmeticOp, SourceLocation, Token},
};
pub struct Parser {
ast: Vec<Node>,
tokens: Vec<(Token, SourceLocation)>,
pos: usize,
}
// -- the grammar --
// expression -> term
// term -> factor ( ('+' | '-') factor )*
// factor -> unary ( ('*' | '/' | '%') unary )*
// unary -> ('!' | '-') unary | primary
// primary -> NUMBER | '(' expression ')'
impl Parser {
pub fn new(tokens: Vec<(Token, SourceLocation)>) -> Parser {
Parser { tokens, pos: 0, ast: Vec::new() }
}
fn current(&self) -> ParseResult<&(Token, SourceLocation)> {
self.tokens.get(self.pos).ok_or_else(|| ParseError::UnexpectedEnd)
}
fn advance(&mut self) {
self.pos += 1;
}
fn is_end(&self) -> bool {
self.pos >= self.tokens.len()
}
fn expect(&self, token: Token) -> bool {
!self.is_end() && self.current().map(|(t, _)| t == &token).unwrap_or(false)
}
fn parse_expr(&mut self) -> ParseResult<Node> {
self.parse_term()
}
fn parse_term(&mut self) -> ParseResult<Node> {
let mut node = self.parse_factor()?;
while !self.is_end() {
let op = if let (Token::ArithmeticOp(op), _) = self.current()? {
match op {
ArithmeticOp::Add => BinaryOp::Add,
ArithmeticOp::Subtract => BinaryOp::Subtract,
_ => break,
}
} else {
break;
};
self.advance();
let right = self.parse_factor()?;
node = Node::BinaryExpr { left: Box::new(node), op, right: Box::new(right) };
}
Ok(node)
}
fn parse_factor(&mut self) -> ParseResult<Node> {
let mut node = self.parse_unary()?;
while !self.is_end() {
let op = if let (Token::ArithmeticOp(op), _) = self.current()? {
match op {
ArithmeticOp::Multiply => BinaryOp::Multiply,
ArithmeticOp::Divide => BinaryOp::Divide,
ArithmeticOp::Modulus => BinaryOp::Modulus,
_ => break,
}
} else {
break;
};
self.advance();
let right = self.parse_unary()?;
node = Node::BinaryExpr { left: Box::new(node), op, right: Box::new(right) };
}
Ok(node)
}
fn parse_unary(&mut self) -> ParseResult<Node> {
let current = self.current()?;
if let (Token::ArithmeticOp(ArithmeticOp::Subtract), _) = current {
self.advance();
let expr = self.parse_unary()?;
Ok(Node::UnaryExpr { op: UnaryOp::Negate, expr: Box::new(expr) })
} else {
self.parse_primary()
}
}
fn parse_primary(&mut self) -> ParseResult<Node> {
if let (Token::Number { .. }, _) = self.current()? {
let (current, loc) = self.current()?;
let node = match current {
Token::Number { value, is_fractional } => {
if *is_fractional {
Node::Float(value.parse().unwrap())
} else {
Node::Integer(value.parse().unwrap())
}
}
_ => return Err(ParseError::UnexpectedToken { token: current.clone(), loc: *loc }),
};
self.advance();
Ok(node)
} else if self.expect(Token::LeftParen) {
self.advance();
let node = self.parse_expr()?;
if !self.expect(Token::RightParen) {
return self.unexpected();
}
self.advance();
Ok(node)
} else {
self.unexpected()
}
}
fn unexpected<T>(&self) -> ParseResult<T> {
let (token, loc) = self.current()?;
Err(ParseError::UnexpectedToken { token: token.clone(), loc: loc.clone() })
}
pub fn parse(&mut self) -> ParseResult {
let mut ast = Vec::new();
while !self.is_end() {
ast.push(self.parse_expr()?);
}
self.ast = ast;
Ok(())
}
}
type ParseResult<T = ()> = Result<T, ParseError>;
pub enum ParseError {
UnexpectedToken { token: Token, loc: SourceLocation },
UnexpectedEnd,
}
impl Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ParseError::UnexpectedToken { token, loc } => {
write!(f, "Unexpected token {:?} at {}", token, loc)
}
ParseError::UnexpectedEnd => write!(f, "Unexpected end of input"),
}
}
}
impl Debug for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Display::fmt(self, f)
}
}
#[cfg(test)]
mod tests {
use crate::util::expression::{ast::BinaryOp, lexer::Tokenizer};
use super::*;
fn parse(input: &str) -> Vec<Node> {
let mut tokenizer = Tokenizer::new(input);
tokenizer.scan().unwrap();
let mut parser = Parser::new(tokenizer.tokens);
parser.parse().unwrap();
println!("{:#?}", parser.ast);
parser.ast
}
#[test]
fn test_parser() {
parse("-4 + (2 * 3) % 5 + (2 + 2) * 2");
assert_eq!(
parse("1 + 2 * 3"),
vec![Node::BinaryExpr {
left: Box::new(Node::Integer(1)),
op: BinaryOp::Add,
right: Box::new(Node::BinaryExpr {
left: Box::new(Node::Integer(2)),
op: BinaryOp::Multiply,
right: Box::new(Node::Integer(3)),
}),
}]
);
}
}

View file

@ -0,0 +1,60 @@
use std::fmt::Display;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ArithmeticOp {
Add,
Subtract,
Multiply,
Divide,
Modulus,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum LogicalOp {
And,
Or,
Not,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ComparisonOp {
Equal,
NotEqual,
LessThan,
GreaterThan,
LessThanOrEqual,
GreaterThanOrEqual,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum BitwiseOp {
And,
Or,
Xor,
Not,
ShiftLeft,
ShiftRight,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Token {
Number { value: String, is_fractional: bool },
ArithmeticOp(ArithmeticOp),
LogicalOp(LogicalOp),
ComparisonOp(ComparisonOp),
BitwiseOp(BitwiseOp),
LeftParen,
RightParen,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct SourceLocation {
pub line: usize,
pub column: usize,
}
impl Display for SourceLocation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "line {}, column {}", self.line, self.column)
}
}

68
src/util/expression/vm.rs Normal file
View file

@ -0,0 +1,68 @@
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum VMOpcode {
// No operation
NOP,
// Pop the top value off the stack
STPOP,
// Loads a specified variable onto the stack
STLD(u16),
// Pushes a number onto the stack
STPSH(f64),
// Adds the top (int, int) from the stack
ADDII,
// Adds the top (float, float) from the stack
ADDFF,
// Subtracts the (int, int) from the stack
SUBII,
// Subtracts the (float, float) from the stack
SUBFF,
// Multiplies the top (int, int) from the stack
MULII,
// Multiplies the top (float, float) from the stack
MULFF,
// Divides the top (int, int) from the stack
DIVII,
// Divides the top (float, float) from the stack
DIVFF,
// Modulus of the top (int, int) from the stack
MODII,
// Modulus of the top (float, float) from the stack
MODFF,
// Calls a function with specified index and passes 1 argument from the stack
CALL1(u16),
// Calls a function with specified index and passes 2 arguments from the stack
CALL2(u16),
// Calls a function with specified index and passes 3 arguments from the stack
CALL3(u16),
// Intrinsics
// f64(int) -> f64
INFLOAT2INT,
// int(f64) -> int
ININT2FLOAT,
// frandom() -> f64
INFRAND,
// irandom() -> int
INIRAND,
// floor(f64) -> f64
INFLOOR,
// ceil(f64) -> f64
INCEIL,
// round(f64) -> f64
INROUND,
// abs(f64) -> f64
INABS,
// min(f64, f64) -> f64
INMIN,
// max(f64, f64) -> f64
INMAX,
// sqrt(f64) -> f64
INSQRT,
// cos(f64) -> f64
INCOS,
// sin(f64) -> f64
INSIN,
}
pub struct VM {
}

View file

@ -1,4 +1,5 @@
pub mod atom; pub mod atom;
pub mod bitvec; pub mod bitvec;
pub mod browser; pub mod browser;
pub mod expression;
pub mod rng; pub mod rng;