Add parenthesized expression parsing

Another rewrite, this time to account for easier architecture. It also
adds parsing for simple binary expressions and parenthesized expressions
which was harder to do in the previous version.
This commit is contained in:
Aodhnait Étaín 2021-05-26 20:19:14 +01:00
parent 4e339b1f6e
commit 6e7b4d8319

View file

@ -1,28 +1,31 @@
#![feature(box_syntax)] #![feature(box_syntax)]
static LOWEST_PRECEDENCE: usize = 0;
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
enum Token<'a> { enum Token<'a> {
IntegerLiteral(&'a str), IntegerLiteral(&'a str),
Plus, Plus,
Semicolon, Semicolon,
LeftParen,
RightParen,
} }
impl Token<'_> { impl Token<'_> {
// Returns length of that token in bytes, which is used for advancing the
// cursor in the lexer.
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
return match self { return match self {
Token::IntegerLiteral(s) => s.len(), Token::IntegerLiteral(s) => s.len(),
Token::Plus | Token::Semicolon => 1, Token::Plus | Token::Semicolon | Token::LeftParen | Token::RightParen => 1,
}; };
} }
}
impl std::fmt::Display for Token<'_> { pub fn precedence(&self) -> usize {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return match self { return match self {
Token::IntegerLiteral(i) => write!(f, "{}", i), Token::Plus => 1,
Token::Plus => write!(f, "+"), _ => LOWEST_PRECEDENCE,
_ => unreachable!(), }
};
} }
} }
@ -60,6 +63,8 @@ impl<'a> Source<'a> {
let token = match chars.next()? { let token = match chars.next()? {
'+' => Token::Plus, '+' => Token::Plus,
';' => Token::Semicolon, ';' => Token::Semicolon,
'(' => Token::LeftParen,
')' => Token::RightParen,
c if c.is_ascii_digit() => { c if c.is_ascii_digit() => {
let start = self.cursor; let start = self.cursor;
let mut length = c.len_utf8(); let mut length = c.len_utf8();
@ -81,65 +86,107 @@ impl<'a> Source<'a> {
pub fn next(&mut self) -> Option<Token<'a>> { pub fn next(&mut self) -> Option<Token<'a>> {
let token = match self.last { let token = match self.last {
Some(t) => t, Some(t) => {
self.last = None;
t
},
None => self.get_next()?, None => self.get_next()?,
}; };
self.last = None;
self.cursor += token.len(); self.cursor += token.len();
return Some(token); return Some(token);
} }
pub fn peek(&mut self) -> Option<Token<'a>> { pub fn peek(&mut self) -> Option<Token<'a>> {
// We unwrap and then wrap it again as it makes more semantic sense, since
// an Option that get_next returns is not connected to what peek returns.
// In future we might want to add more sophisticated error handling to that
// function, and then it will get easier to refactor. More so, we avoid
// exposing lexer's internal state to the user.
self.last = Some(self.get_next()?); self.last = Some(self.get_next()?);
return self.last; return self.last;
} }
} }
#[derive(Debug)] // Represents a dynamic parsing process, will get converted to ast::Tree after
enum Statement<'a> { // it completes.
Expression(Expression<'a>), struct Parser<'a> {
} source: &'a mut Source<'a>,
// statement = expression ';' .
fn parse_statement<'a>(source: &mut Source<'a>) -> Option<Statement<'a>> {
let expression = match source.peek()? {
Token::IntegerLiteral(_) => parse_expression(source)?,
_ => return None,
};
return match source.next()? {
Token::Semicolon => Some(Statement::Expression(expression)),
_ => None,
};
} }
#[derive(Debug)] #[derive(Debug)]
enum Expression<'a> { enum Expr<'a> {
Literal(&'a str), Literal(&'a str),
Binary(Token<'a>, Box<Expression<'a>>, Box<Expression<'a>>), Paren(Box<Expr<'a>>),
Binary(Token<'a>, Box<Expr<'a>>, Box<Expr<'a>>),
} }
// expression = literal | expression '+' expression . impl<'a> Parser<'a> {
fn parse_expression<'a>(source: &mut Source<'a>) -> Option<Expression<'a>> { pub fn new(source: &'a mut Source<'a>) -> Self {
let lhs = match source.next()? { return Self {
Token::IntegerLiteral(i) => Expression::Literal(i), source,
_ => return None, };
}; }
let operator = match source.peek()? { #[inline(always)]
token @ Token::Plus => token, fn next(&mut self) -> Option<Token<'a>> {
Token::Semicolon => return Some(lhs), return self.source.next();
_ => return None, }
};
source.next();
let rhs = parse_expression(source)?; #[inline(always)]
return Some(Expression::Binary(operator, box lhs, box rhs)); fn peek(&mut self) -> Option<Token<'a>> {
return self.source.peek();
}
#[inline(always)]
fn bump(&mut self) {
self.next();
}
fn parse_unary_expr(&mut self) -> Option<Expr<'a>> {
return match self.next()? {
Token::IntegerLiteral(s) => Some(Expr::Literal(s)),
Token::LeftParen => {
let expr = self.parse_expr(0)?;
return match self.next()? {
Token::RightParen => Some(Expr::Paren(box expr)),
_ => None,
};
},
_ => None,
};
}
pub fn parse_expr(&mut self, min_precedence: usize) -> Option<Expr<'a>> {
let mut lhs = self.parse_unary_expr()?;
loop {
match self.peek()? {
token @ Token::Plus => {
let prec = token.precedence();
if prec <= min_precedence {
return Some(lhs);
};
// Don't advance the parser before we make sure that the precedence
// is correct.
self.bump();
let rhs = self.parse_expr(prec)?;
lhs = Expr::Binary(token, box lhs, box rhs);
},
_ => return Some(lhs),
};
}
}
} }
fn main() { fn main() {
let inline_source = "3 + 5 + 7;"; // let inline_source = "3 + 5 + 7;";
// let inline_source = "(3 + 5) + 7;";
let inline_source = "3 + (5 + (7 + 11));";
let mut source = Source::new(inline_source); let mut source = Source::new(inline_source);
eprintln!("{:?}", parse_statement(&mut source)); let mut parser = Parser::new(&mut source);
eprintln!("{:?}", parser.parse_expr(0));
} }