diff --git a/src/main.rs b/src/main.rs index b31b288..8008898 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,28 +1,31 @@ #![feature(box_syntax)] +static LOWEST_PRECEDENCE: usize = 0; + #[derive(Debug, Copy, Clone)] enum Token<'a> { IntegerLiteral(&'a str), Plus, Semicolon, + LeftParen, + RightParen, } impl Token<'_> { + // Returns length of that token in bytes, which is used for advancing the + // cursor in the lexer. pub fn len(&self) -> usize { return match self { Token::IntegerLiteral(s) => s.len(), - Token::Plus | Token::Semicolon => 1, + Token::Plus | Token::Semicolon | Token::LeftParen | Token::RightParen => 1, }; } -} -impl std::fmt::Display for Token<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + pub fn precedence(&self) -> usize { return match self { - Token::IntegerLiteral(i) => write!(f, "{}", i), - Token::Plus => write!(f, "+"), - _ => unreachable!(), - }; + Token::Plus => 1, + _ => LOWEST_PRECEDENCE, + } } } @@ -60,6 +63,8 @@ impl<'a> Source<'a> { let token = match chars.next()? { '+' => Token::Plus, ';' => Token::Semicolon, + '(' => Token::LeftParen, + ')' => Token::RightParen, c if c.is_ascii_digit() => { let start = self.cursor; let mut length = c.len_utf8(); @@ -81,65 +86,107 @@ impl<'a> Source<'a> { pub fn next(&mut self) -> Option> { let token = match self.last { - Some(t) => t, + Some(t) => { + self.last = None; + t + }, None => self.get_next()?, }; - self.last = None; self.cursor += token.len(); return Some(token); } pub fn peek(&mut self) -> Option> { + // We unwrap and then wrap it again as it makes more semantic sense, since + // an Option that get_next returns is not connected to what peek returns. + // In future we might want to add more sophisticated error handling to that + // function, and then it will get easier to refactor. More so, we avoid + // exposing lexer's internal state to the user. self.last = Some(self.get_next()?); return self.last; } } -#[derive(Debug)] -enum Statement<'a> { - Expression(Expression<'a>), -} - -// statement = expression ';' . -fn parse_statement<'a>(source: &mut Source<'a>) -> Option> { - let expression = match source.peek()? { - Token::IntegerLiteral(_) => parse_expression(source)?, - _ => return None, - }; - - return match source.next()? { - Token::Semicolon => Some(Statement::Expression(expression)), - _ => None, - }; +// Represents a dynamic parsing process, will get converted to ast::Tree after +// it completes. +struct Parser<'a> { + source: &'a mut Source<'a>, } #[derive(Debug)] -enum Expression<'a> { +enum Expr<'a> { Literal(&'a str), - Binary(Token<'a>, Box>, Box>), + Paren(Box>), + Binary(Token<'a>, Box>, Box>), } -// expression = literal | expression '+' expression . -fn parse_expression<'a>(source: &mut Source<'a>) -> Option> { - let lhs = match source.next()? { - Token::IntegerLiteral(i) => Expression::Literal(i), - _ => return None, - }; +impl<'a> Parser<'a> { + pub fn new(source: &'a mut Source<'a>) -> Self { + return Self { + source, + }; + } - let operator = match source.peek()? { - token @ Token::Plus => token, - Token::Semicolon => return Some(lhs), - _ => return None, - }; - source.next(); + #[inline(always)] + fn next(&mut self) -> Option> { + return self.source.next(); + } - let rhs = parse_expression(source)?; - return Some(Expression::Binary(operator, box lhs, box rhs)); + #[inline(always)] + fn peek(&mut self) -> Option> { + return self.source.peek(); + } + + #[inline(always)] + fn bump(&mut self) { + self.next(); + } + + fn parse_unary_expr(&mut self) -> Option> { + return match self.next()? { + Token::IntegerLiteral(s) => Some(Expr::Literal(s)), + Token::LeftParen => { + let expr = self.parse_expr(0)?; + + return match self.next()? { + Token::RightParen => Some(Expr::Paren(box expr)), + _ => None, + }; + }, + _ => None, + }; + } + + pub fn parse_expr(&mut self, min_precedence: usize) -> Option> { + let mut lhs = self.parse_unary_expr()?; + + loop { + match self.peek()? { + token @ Token::Plus => { + let prec = token.precedence(); + if prec <= min_precedence { + return Some(lhs); + }; + + // Don't advance the parser before we make sure that the precedence + // is correct. + self.bump(); + + let rhs = self.parse_expr(prec)?; + lhs = Expr::Binary(token, box lhs, box rhs); + }, + _ => return Some(lhs), + }; + } + } } fn main() { - let inline_source = "3 + 5 + 7;"; + // let inline_source = "3 + 5 + 7;"; + // let inline_source = "(3 + 5) + 7;"; + let inline_source = "3 + (5 + (7 + 11));"; let mut source = Source::new(inline_source); - eprintln!("{:?}", parse_statement(&mut source)); + let mut parser = Parser::new(&mut source); + eprintln!("{:?}", parser.parse_expr(0)); }