Compare commits

...

4 commits

Author SHA1 Message Date
Aodhnait Étaín 569391612a
Add comment parsing to the lexer
Now lexer can properly parse and skip comments, as well as any
other combination of whitespace and comments.
2021-05-28 12:59:55 +00:00
Aodhnait Étaín 48f47671cd
Add minus operator
Adds "minus" operator, both as a unary negation and a binary
subtraction. Refactors Parser::bump to actually return the next
token, which is slightly more useful for us. Also changes how
Token::len is implemented, and now we return 1 for all tokens
that are not listed explicitly.
2021-05-27 17:40:36 +00:00
Aodhnait Étaín 56fffd6911
Add unary expressions
Adds parsing of unary plus to the parser.
2021-05-27 15:55:35 +00:00
Aodhnait Étaín 155325e78c
Add multiplication operator
Can now parse a multiplication operator with the correct precedence.
Parentheses are no longer included in the AST output.

Binary operator parser has been simplified to avoid redundancy, and to
avoid having to manually provide every token that can be a binary
operator.
2021-05-27 12:26:26 +00:00

View file

@ -6,6 +6,8 @@ static LOWEST_PRECEDENCE: usize = 0;
enum Token<'a> {
IntegerLiteral(&'a str),
Plus,
Star,
Minus,
Semicolon,
LeftParen,
RightParen,
@ -17,13 +19,15 @@ impl Token<'_> {
pub fn len(&self) -> usize {
return match self {
Token::IntegerLiteral(s) => s.len(),
Token::Plus | Token::Semicolon | Token::LeftParen | Token::RightParen => 1,
// Token::Plus, Token::Minus, Token::Semicolon, Token::LeftParen, Token::RightParen, Token::Star
_ => 1,
};
}
pub fn precedence(&self) -> usize {
return match self {
Token::Plus => 1,
Token::Plus | Token::Minus => 1,
Token::Star => 2,
_ => LOWEST_PRECEDENCE,
}
}
@ -44,24 +48,52 @@ impl<'a> Source<'a> {
};
}
fn skip_whitespace(&mut self) {
fn skip_whitespace(&mut self) -> bool {
let mut chars = self.source[self.cursor..].chars();
let mut skipped = false;
while let Some(c) = chars.next() {
if c.is_whitespace() {
self.cursor += c.len_utf8();
skipped = true;
} else {
return;
return skipped;
}
};
return skipped;
}
fn skip_comments(&mut self) -> bool {
let mut chars = self.source[self.cursor..].chars();
if let Some('/') = chars.next() {
if let Some('/') = chars.next() {
self.cursor += 2;
while let Some(c) = chars.next() {
if c == '\n' {
self.cursor += 1;
return true;
};
self.cursor += c.len_utf8();
};
};
};
return false;
}
fn get_next(&mut self) -> Option<Token<'a>> {
self.skip_whitespace();
let mut chars = self.source[self.cursor..].chars();
// Skip all possible comments and whitespace.
while self.skip_comments() || self.skip_whitespace() { };
let mut chars = self.source[self.cursor..].chars().peekable();
let token = match chars.next()? {
'+' => Token::Plus,
'-' => Token::Minus,
'*' => Token::Star,
';' => Token::Semicolon,
'(' => Token::LeftParen,
')' => Token::RightParen,
@ -117,10 +149,19 @@ struct Parser<'a> {
#[derive(Debug)]
enum Expr<'a> {
Literal(&'a str),
Paren(Box<Expr<'a>>),
// Paren(Box<Expr<'a>>),
Unary(Token<'a>, Box<Expr<'a>>),
Binary(Token<'a>, Box<Expr<'a>>, Box<Expr<'a>>),
}
// TODO: Add expect method, which checks for whether two tokens are of the same
// kind, without checking for the equality of their (possible) inner values.
// The issue is if it should advance the parser (the more obvious way), or make
// it up to caller to do it (less obvious, but maybe useful?). The other issue
// is how should the parser behave on failed call to expect — should it panic,
// or maybe we have to introduce more sophisticated error handling? And then
// we also would want to use self.peek()? inside its implementation, is it even
// possible? So many questions. >_<
impl<'a> Parser<'a> {
pub fn new(source: &'a mut Source<'a>) -> Self {
return Self {
@ -128,6 +169,7 @@ impl<'a> Parser<'a> {
};
}
#[must_use = "You should always use the return value of the next method, as it mutates the parser."]
#[inline(always)]
fn next(&mut self) -> Option<Token<'a>> {
return self.source.next();
@ -138,19 +180,24 @@ impl<'a> Parser<'a> {
return self.source.peek();
}
// A wrapper around next function, which WILL panic if there is no token left
// in the source. The intent is to ONLY use it when it is KNOWN that we can
// safely take a token without causing a panic, such as when we peek a token.
#[inline(always)]
fn bump(&mut self) {
self.next();
fn bump(&mut self) -> Token<'a> {
return self.next().unwrap();
}
fn parse_unary_expr(&mut self) -> Option<Expr<'a>> {
fn parse_primary_expr(&mut self) -> Option<Expr<'a>> {
return match self.next()? {
Token::IntegerLiteral(s) => Some(Expr::Literal(s)),
Token::LeftParen => {
let expr = self.parse_expr(0)?;
return match self.next()? {
Token::RightParen => Some(Expr::Paren(box expr)),
// Should we bother keeping the parentheses information in the AST?
// Token::RightParen => Some(Expr::Paren(box expr)),
Token::RightParen => Some(expr),
_ => None,
};
},
@ -158,34 +205,48 @@ impl<'a> Parser<'a> {
};
}
fn parse_unary_expr(&mut self) -> Option<Expr<'a>> {
return match self.peek()? {
Token::Plus | Token::Minus => {
let token = self.bump();
let expr = self.parse_unary_expr()?;
Some(Expr::Unary(token, box expr))
},
_ => self.parse_primary_expr(),
};
}
// expr = unary_expr [ op expr ]
// unary_expr = '+' unary_expr | primary_expr
// primary_expr = literal | '(' expr ')'
pub fn parse_expr(&mut self, min_precedence: usize) -> Option<Expr<'a>> {
let mut lhs = self.parse_unary_expr()?;
loop {
match self.peek()? {
token @ Token::Plus => {
let token = self.peek()?;
let prec = token.precedence();
if prec <= min_precedence {
return Some(lhs);
};
// Don't advance the parser before we make sure that the precedence
// is correct.
// NOTE: Don't advance the parser before we make sure that the
// precedence is correct.
self.bump();
let rhs = self.parse_expr(prec)?;
lhs = Expr::Binary(token, box lhs, box rhs);
},
_ => return Some(lhs),
};
}
}
}
fn main() {
// let inline_source = "3 + 5 + 7;";
let inline_source = " // hello, world\n 3;";
// let inline_source = "3 + 2 - -5;";
// let inline_source = "3 + +5 * +7;";
// let inline_source = "3 + 5 * 7;";
// let inline_source = "(3 + 5) + 7;";
let inline_source = "3 + (5 + (7 + 11));";
// let inline_source = "3 + (5 + (7 + 11));";
let mut source = Source::new(inline_source);
let mut parser = Parser::new(&mut source);
eprintln!("{:?}", parser.parse_expr(0));