Add multiplication to the expression parser

Now it also handles expression such as 11 * 13, 11 + 13 * 17, and even
11 + 13 * 17 + 19. We also introduced new function, is_binary_operator,
which allows for simpler checking of operators in the expression parser.

We fixed the issue that parser didn't parse its input exhaustively, i.e.
when given '+17 + 23 + +21' it would return only +17 + 23, which is far
from what we would expect.
This commit is contained in:
Aodhnait Étaín 2021-05-23 08:06:26 +01:00
parent a29dfc413c
commit 262df19aa3

View file

@ -91,7 +91,8 @@ fn main() {
let path = path.unwrap(); let path = path.unwrap();
eprintln!("compiling `{}`", path); eprintln!("compiling `{}`", path);
#[allow(non_upper_case_globals)] const source: &'static str = "+17 + 23 + +21;"; // #[allow(non_upper_case_globals)] const source: &'static str = "+17 + 23 + +21 + 11;";
#[allow(non_upper_case_globals)] const source: &'static str = "11 + 13 * 17 + 19;";
let mut tokens = TokenStream::from(source); let mut tokens = TokenStream::from(source);
let expr = parse_expression(&mut tokens, 0); let expr = parse_expression(&mut tokens, 0);
eprintln!("{:?}", expr); eprintln!("{:?}", expr);
@ -203,20 +204,28 @@ impl Expression {
fn unary_precedence(token: Token) -> Option<usize> { fn unary_precedence(token: Token) -> Option<usize> {
return match token { return match token {
Token::Plus => Some(2), Token::Plus => Some(3),
_ => None, _ => None,
}; };
} }
fn binary_precedence(token: Token) -> Option<usize> { fn binary_precedence(token: Token) -> Option<usize> {
return match token { return match token {
Token::Asterisk => Some(2),
Token::Plus => Some(1), Token::Plus => Some(1),
_ => None, _ => None,
}; };
} }
fn is_binary_operator(token: Token) -> bool {
return match token {
Token::Plus | Token::Asterisk => true,
_ => false,
};
}
fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precedence: usize) -> Option<Expression> { fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precedence: usize) -> Option<Expression> {
let lhs = match tokens.next()? { let mut lhs = match tokens.next()? {
token @ Token::IntegerLiteral(_) => Expression::Literal(token), token @ Token::IntegerLiteral(_) => Expression::Literal(token),
token => { token => {
if let Some(precedence) = unary_precedence(token) { if let Some(precedence) = unary_precedence(token) {
@ -230,8 +239,8 @@ fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precede
loop { loop {
let operator = match tokens.peek()? { let operator = match tokens.peek()? {
operator @ Token::Plus => operator, operator if is_binary_operator(operator) => operator,
_ => return None, _ => return Some(lhs),
}; };
let precedence = binary_precedence(operator)?; let precedence = binary_precedence(operator)?;
@ -240,17 +249,17 @@ fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precede
return Some(lhs); return Some(lhs);
} }
return match operator { if is_binary_operator(operator) {
Token::Plus => {
tokens.next(); tokens.next();
let rhs = parse_expression(tokens, precedence)?; let rhs = parse_expression(tokens, precedence)?;
lhs = Expression::Binary(operator, box lhs, box rhs);
Some(Expression::Binary(operator, box lhs, box rhs)) if tokens.peek().map(is_binary_operator).unwrap_or(false) {
}, continue;
// If it's not a valid operator, then caller can get rest of the input in the token stream }
// it has provided to us. }
_ => Some(lhs),
}; return Some(lhs);
} }
} }
@ -298,6 +307,7 @@ impl<'a> TokenStream<'a> {
let mut chars = self.chars(); let mut chars = self.chars();
let token = match chars.next()? { let token = match chars.next()? {
'*' => Token::Asterisk,
'+' => Token::Plus, '+' => Token::Plus,
';' => Token::Semicolon, ';' => Token::Semicolon,
c if c.is_numeric() => { c if c.is_numeric() => {
@ -378,6 +388,7 @@ impl OffsetStr {
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
enum Token { enum Token {
Plus, Plus,
Asterisk,
Semicolon, Semicolon,
IntegerLiteral(OffsetStr), IntegerLiteral(OffsetStr),
} }
@ -388,6 +399,7 @@ impl std::fmt::Display for Token {
Token::IntegerLiteral(s) => write!(f, "{}", s), Token::IntegerLiteral(s) => write!(f, "{}", s),
token => write!(f, "{}", match token { token => write!(f, "{}", match token {
Token::Plus => "+", Token::Plus => "+",
Token::Asterisk => "*",
Token::Semicolon => ";", Token::Semicolon => ";",
_ => unreachable!(), _ => unreachable!(),
}), }),
@ -398,7 +410,7 @@ impl std::fmt::Display for Token {
impl Token { impl Token {
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
return match self { return match self {
Token::Plus | Token::Semicolon => 1, Token::Plus | Token::Asterisk | Token::Semicolon => 1,
Token::IntegerLiteral(i) => i.length, Token::IntegerLiteral(i) => i.length,
}; };
} }