Add parenthesized expression parsing

Another rewrite, this time to account for easier architecture. It also
adds parsing for simple binary expressions and parenthesized expressions
which was harder to do in the previous version.
This commit is contained in:
Aodhnait Étaín 2021-05-26 20:19:14 +01:00
parent 4e339b1f6e
commit 6e7b4d8319

View file

@ -1,28 +1,31 @@
#![feature(box_syntax)]
static LOWEST_PRECEDENCE: usize = 0;
#[derive(Debug, Copy, Clone)]
enum Token<'a> {
IntegerLiteral(&'a str),
Plus,
Semicolon,
LeftParen,
RightParen,
}
impl Token<'_> {
// Returns length of that token in bytes, which is used for advancing the
// cursor in the lexer.
pub fn len(&self) -> usize {
return match self {
Token::IntegerLiteral(s) => s.len(),
Token::Plus | Token::Semicolon => 1,
Token::Plus | Token::Semicolon | Token::LeftParen | Token::RightParen => 1,
};
}
}
impl std::fmt::Display for Token<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
pub fn precedence(&self) -> usize {
return match self {
Token::IntegerLiteral(i) => write!(f, "{}", i),
Token::Plus => write!(f, "+"),
_ => unreachable!(),
};
Token::Plus => 1,
_ => LOWEST_PRECEDENCE,
}
}
}
@ -60,6 +63,8 @@ impl<'a> Source<'a> {
let token = match chars.next()? {
'+' => Token::Plus,
';' => Token::Semicolon,
'(' => Token::LeftParen,
')' => Token::RightParen,
c if c.is_ascii_digit() => {
let start = self.cursor;
let mut length = c.len_utf8();
@ -81,65 +86,107 @@ impl<'a> Source<'a> {
pub fn next(&mut self) -> Option<Token<'a>> {
let token = match self.last {
Some(t) => t,
Some(t) => {
self.last = None;
t
},
None => self.get_next()?,
};
self.last = None;
self.cursor += token.len();
return Some(token);
}
pub fn peek(&mut self) -> Option<Token<'a>> {
// We unwrap and then wrap it again as it makes more semantic sense, since
// an Option that get_next returns is not connected to what peek returns.
// In future we might want to add more sophisticated error handling to that
// function, and then it will get easier to refactor. More so, we avoid
// exposing lexer's internal state to the user.
self.last = Some(self.get_next()?);
return self.last;
}
}
#[derive(Debug)]
enum Statement<'a> {
Expression(Expression<'a>),
}
// statement = expression ';' .
fn parse_statement<'a>(source: &mut Source<'a>) -> Option<Statement<'a>> {
let expression = match source.peek()? {
Token::IntegerLiteral(_) => parse_expression(source)?,
_ => return None,
};
return match source.next()? {
Token::Semicolon => Some(Statement::Expression(expression)),
_ => None,
};
// Represents a dynamic parsing process, will get converted to ast::Tree after
// it completes.
struct Parser<'a> {
source: &'a mut Source<'a>,
}
#[derive(Debug)]
enum Expression<'a> {
enum Expr<'a> {
Literal(&'a str),
Binary(Token<'a>, Box<Expression<'a>>, Box<Expression<'a>>),
Paren(Box<Expr<'a>>),
Binary(Token<'a>, Box<Expr<'a>>, Box<Expr<'a>>),
}
// expression = literal | expression '+' expression .
fn parse_expression<'a>(source: &mut Source<'a>) -> Option<Expression<'a>> {
let lhs = match source.next()? {
Token::IntegerLiteral(i) => Expression::Literal(i),
_ => return None,
};
impl<'a> Parser<'a> {
pub fn new(source: &'a mut Source<'a>) -> Self {
return Self {
source,
};
}
let operator = match source.peek()? {
token @ Token::Plus => token,
Token::Semicolon => return Some(lhs),
_ => return None,
};
source.next();
#[inline(always)]
fn next(&mut self) -> Option<Token<'a>> {
return self.source.next();
}
let rhs = parse_expression(source)?;
return Some(Expression::Binary(operator, box lhs, box rhs));
#[inline(always)]
fn peek(&mut self) -> Option<Token<'a>> {
return self.source.peek();
}
#[inline(always)]
fn bump(&mut self) {
self.next();
}
fn parse_unary_expr(&mut self) -> Option<Expr<'a>> {
return match self.next()? {
Token::IntegerLiteral(s) => Some(Expr::Literal(s)),
Token::LeftParen => {
let expr = self.parse_expr(0)?;
return match self.next()? {
Token::RightParen => Some(Expr::Paren(box expr)),
_ => None,
};
},
_ => None,
};
}
pub fn parse_expr(&mut self, min_precedence: usize) -> Option<Expr<'a>> {
let mut lhs = self.parse_unary_expr()?;
loop {
match self.peek()? {
token @ Token::Plus => {
let prec = token.precedence();
if prec <= min_precedence {
return Some(lhs);
};
// Don't advance the parser before we make sure that the precedence
// is correct.
self.bump();
let rhs = self.parse_expr(prec)?;
lhs = Expr::Binary(token, box lhs, box rhs);
},
_ => return Some(lhs),
};
}
}
}
fn main() {
let inline_source = "3 + 5 + 7;";
// let inline_source = "3 + 5 + 7;";
// let inline_source = "(3 + 5) + 7;";
let inline_source = "3 + (5 + (7 + 11));";
let mut source = Source::new(inline_source);
eprintln!("{:?}", parse_statement(&mut source));
let mut parser = Parser::new(&mut source);
eprintln!("{:?}", parser.parse_expr(0));
}