Compare commits

..

No commits in common. "trunk" and "6e7b4d83194aeb168c59675792389d3b22904f73" have entirely different histories.

View file

@ -6,8 +6,6 @@ static LOWEST_PRECEDENCE: usize = 0;
enum Token<'a> { enum Token<'a> {
IntegerLiteral(&'a str), IntegerLiteral(&'a str),
Plus, Plus,
Star,
Minus,
Semicolon, Semicolon,
LeftParen, LeftParen,
RightParen, RightParen,
@ -19,15 +17,13 @@ impl Token<'_> {
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
return match self { return match self {
Token::IntegerLiteral(s) => s.len(), Token::IntegerLiteral(s) => s.len(),
// Token::Plus, Token::Minus, Token::Semicolon, Token::LeftParen, Token::RightParen, Token::Star Token::Plus | Token::Semicolon | Token::LeftParen | Token::RightParen => 1,
_ => 1,
}; };
} }
pub fn precedence(&self) -> usize { pub fn precedence(&self) -> usize {
return match self { return match self {
Token::Plus | Token::Minus => 1, Token::Plus => 1,
Token::Star => 2,
_ => LOWEST_PRECEDENCE, _ => LOWEST_PRECEDENCE,
} }
} }
@ -48,52 +44,24 @@ impl<'a> Source<'a> {
}; };
} }
fn skip_whitespace(&mut self) -> bool { fn skip_whitespace(&mut self) {
let mut chars = self.source[self.cursor..].chars(); let mut chars = self.source[self.cursor..].chars();
let mut skipped = false;
while let Some(c) = chars.next() { while let Some(c) = chars.next() {
if c.is_whitespace() { if c.is_whitespace() {
self.cursor += c.len_utf8(); self.cursor += c.len_utf8();
skipped = true;
} else { } else {
return skipped; return;
} }
};
return skipped;
} }
fn skip_comments(&mut self) -> bool {
let mut chars = self.source[self.cursor..].chars();
if let Some('/') = chars.next() {
if let Some('/') = chars.next() {
self.cursor += 2;
while let Some(c) = chars.next() {
if c == '\n' {
self.cursor += 1;
return true;
};
self.cursor += c.len_utf8();
};
};
};
return false;
} }
fn get_next(&mut self) -> Option<Token<'a>> { fn get_next(&mut self) -> Option<Token<'a>> {
// Skip all possible comments and whitespace. self.skip_whitespace();
while self.skip_comments() || self.skip_whitespace() { }; let mut chars = self.source[self.cursor..].chars();
let mut chars = self.source[self.cursor..].chars().peekable();
let token = match chars.next()? { let token = match chars.next()? {
'+' => Token::Plus, '+' => Token::Plus,
'-' => Token::Minus,
'*' => Token::Star,
';' => Token::Semicolon, ';' => Token::Semicolon,
'(' => Token::LeftParen, '(' => Token::LeftParen,
')' => Token::RightParen, ')' => Token::RightParen,
@ -149,19 +117,10 @@ struct Parser<'a> {
#[derive(Debug)] #[derive(Debug)]
enum Expr<'a> { enum Expr<'a> {
Literal(&'a str), Literal(&'a str),
// Paren(Box<Expr<'a>>), Paren(Box<Expr<'a>>),
Unary(Token<'a>, Box<Expr<'a>>),
Binary(Token<'a>, Box<Expr<'a>>, Box<Expr<'a>>), Binary(Token<'a>, Box<Expr<'a>>, Box<Expr<'a>>),
} }
// TODO: Add expect method, which checks for whether two tokens are of the same
// kind, without checking for the equality of their (possible) inner values.
// The issue is if it should advance the parser (the more obvious way), or make
// it up to caller to do it (less obvious, but maybe useful?). The other issue
// is how should the parser behave on failed call to expect — should it panic,
// or maybe we have to introduce more sophisticated error handling? And then
// we also would want to use self.peek()? inside its implementation, is it even
// possible? So many questions. >_<
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
pub fn new(source: &'a mut Source<'a>) -> Self { pub fn new(source: &'a mut Source<'a>) -> Self {
return Self { return Self {
@ -169,7 +128,6 @@ impl<'a> Parser<'a> {
}; };
} }
#[must_use = "You should always use the return value of the next method, as it mutates the parser."]
#[inline(always)] #[inline(always)]
fn next(&mut self) -> Option<Token<'a>> { fn next(&mut self) -> Option<Token<'a>> {
return self.source.next(); return self.source.next();
@ -180,24 +138,19 @@ impl<'a> Parser<'a> {
return self.source.peek(); return self.source.peek();
} }
// A wrapper around next function, which WILL panic if there is no token left
// in the source. The intent is to ONLY use it when it is KNOWN that we can
// safely take a token without causing a panic, such as when we peek a token.
#[inline(always)] #[inline(always)]
fn bump(&mut self) -> Token<'a> { fn bump(&mut self) {
return self.next().unwrap(); self.next();
} }
fn parse_primary_expr(&mut self) -> Option<Expr<'a>> { fn parse_unary_expr(&mut self) -> Option<Expr<'a>> {
return match self.next()? { return match self.next()? {
Token::IntegerLiteral(s) => Some(Expr::Literal(s)), Token::IntegerLiteral(s) => Some(Expr::Literal(s)),
Token::LeftParen => { Token::LeftParen => {
let expr = self.parse_expr(0)?; let expr = self.parse_expr(0)?;
return match self.next()? { return match self.next()? {
// Should we bother keeping the parentheses information in the AST? Token::RightParen => Some(Expr::Paren(box expr)),
// Token::RightParen => Some(Expr::Paren(box expr)),
Token::RightParen => Some(expr),
_ => None, _ => None,
}; };
}, },
@ -205,48 +158,34 @@ impl<'a> Parser<'a> {
}; };
} }
fn parse_unary_expr(&mut self) -> Option<Expr<'a>> {
return match self.peek()? {
Token::Plus | Token::Minus => {
let token = self.bump();
let expr = self.parse_unary_expr()?;
Some(Expr::Unary(token, box expr))
},
_ => self.parse_primary_expr(),
};
}
// expr = unary_expr [ op expr ]
// unary_expr = '+' unary_expr | primary_expr
// primary_expr = literal | '(' expr ')'
pub fn parse_expr(&mut self, min_precedence: usize) -> Option<Expr<'a>> { pub fn parse_expr(&mut self, min_precedence: usize) -> Option<Expr<'a>> {
let mut lhs = self.parse_unary_expr()?; let mut lhs = self.parse_unary_expr()?;
loop { loop {
let token = self.peek()?; match self.peek()? {
token @ Token::Plus => {
let prec = token.precedence(); let prec = token.precedence();
if prec <= min_precedence { if prec <= min_precedence {
return Some(lhs); return Some(lhs);
}; };
// NOTE: Don't advance the parser before we make sure that the // Don't advance the parser before we make sure that the precedence
// precedence is correct. // is correct.
self.bump(); self.bump();
let rhs = self.parse_expr(prec)?; let rhs = self.parse_expr(prec)?;
lhs = Expr::Binary(token, box lhs, box rhs); lhs = Expr::Binary(token, box lhs, box rhs);
},
_ => return Some(lhs),
}; };
} }
}
} }
fn main() { fn main() {
let inline_source = " // hello, world\n 3;"; // let inline_source = "3 + 5 + 7;";
// let inline_source = "3 + 2 - -5;";
// let inline_source = "3 + +5 * +7;";
// let inline_source = "3 + 5 * 7;";
// let inline_source = "(3 + 5) + 7;"; // let inline_source = "(3 + 5) + 7;";
// let inline_source = "3 + (5 + (7 + 11));"; let inline_source = "3 + (5 + (7 + 11));";
let mut source = Source::new(inline_source); let mut source = Source::new(inline_source);
let mut parser = Parser::new(&mut source); let mut parser = Parser::new(&mut source);
eprintln!("{:?}", parser.parse_expr(0)); eprintln!("{:?}", parser.parse_expr(0));