Compare commits
4 commits
6e7b4d8319
...
trunk
Author | SHA1 | Date | |
---|---|---|---|
|
569391612a | ||
|
48f47671cd | ||
|
56fffd6911 | ||
|
155325e78c |
119
src/main.rs
119
src/main.rs
|
@ -6,6 +6,8 @@ static LOWEST_PRECEDENCE: usize = 0;
|
||||||
enum Token<'a> {
|
enum Token<'a> {
|
||||||
IntegerLiteral(&'a str),
|
IntegerLiteral(&'a str),
|
||||||
Plus,
|
Plus,
|
||||||
|
Star,
|
||||||
|
Minus,
|
||||||
Semicolon,
|
Semicolon,
|
||||||
LeftParen,
|
LeftParen,
|
||||||
RightParen,
|
RightParen,
|
||||||
|
@ -17,13 +19,15 @@ impl Token<'_> {
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
return match self {
|
return match self {
|
||||||
Token::IntegerLiteral(s) => s.len(),
|
Token::IntegerLiteral(s) => s.len(),
|
||||||
Token::Plus | Token::Semicolon | Token::LeftParen | Token::RightParen => 1,
|
// Token::Plus, Token::Minus, Token::Semicolon, Token::LeftParen, Token::RightParen, Token::Star
|
||||||
|
_ => 1,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn precedence(&self) -> usize {
|
pub fn precedence(&self) -> usize {
|
||||||
return match self {
|
return match self {
|
||||||
Token::Plus => 1,
|
Token::Plus | Token::Minus => 1,
|
||||||
|
Token::Star => 2,
|
||||||
_ => LOWEST_PRECEDENCE,
|
_ => LOWEST_PRECEDENCE,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -44,24 +48,52 @@ impl<'a> Source<'a> {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn skip_whitespace(&mut self) {
|
fn skip_whitespace(&mut self) -> bool {
|
||||||
let mut chars = self.source[self.cursor..].chars();
|
let mut chars = self.source[self.cursor..].chars();
|
||||||
|
let mut skipped = false;
|
||||||
|
|
||||||
while let Some(c) = chars.next() {
|
while let Some(c) = chars.next() {
|
||||||
if c.is_whitespace() {
|
if c.is_whitespace() {
|
||||||
self.cursor += c.len_utf8();
|
self.cursor += c.len_utf8();
|
||||||
|
skipped = true;
|
||||||
} else {
|
} else {
|
||||||
return;
|
return skipped;
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
|
return skipped;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip_comments(&mut self) -> bool {
|
||||||
|
let mut chars = self.source[self.cursor..].chars();
|
||||||
|
|
||||||
|
if let Some('/') = chars.next() {
|
||||||
|
if let Some('/') = chars.next() {
|
||||||
|
self.cursor += 2;
|
||||||
|
|
||||||
|
while let Some(c) = chars.next() {
|
||||||
|
if c == '\n' {
|
||||||
|
self.cursor += 1;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
self.cursor += c.len_utf8();
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_next(&mut self) -> Option<Token<'a>> {
|
fn get_next(&mut self) -> Option<Token<'a>> {
|
||||||
self.skip_whitespace();
|
// Skip all possible comments and whitespace.
|
||||||
let mut chars = self.source[self.cursor..].chars();
|
while self.skip_comments() || self.skip_whitespace() { };
|
||||||
|
let mut chars = self.source[self.cursor..].chars().peekable();
|
||||||
|
|
||||||
let token = match chars.next()? {
|
let token = match chars.next()? {
|
||||||
'+' => Token::Plus,
|
'+' => Token::Plus,
|
||||||
|
'-' => Token::Minus,
|
||||||
|
'*' => Token::Star,
|
||||||
';' => Token::Semicolon,
|
';' => Token::Semicolon,
|
||||||
'(' => Token::LeftParen,
|
'(' => Token::LeftParen,
|
||||||
')' => Token::RightParen,
|
')' => Token::RightParen,
|
||||||
|
@ -117,10 +149,19 @@ struct Parser<'a> {
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum Expr<'a> {
|
enum Expr<'a> {
|
||||||
Literal(&'a str),
|
Literal(&'a str),
|
||||||
Paren(Box<Expr<'a>>),
|
// Paren(Box<Expr<'a>>),
|
||||||
|
Unary(Token<'a>, Box<Expr<'a>>),
|
||||||
Binary(Token<'a>, Box<Expr<'a>>, Box<Expr<'a>>),
|
Binary(Token<'a>, Box<Expr<'a>>, Box<Expr<'a>>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Add expect method, which checks for whether two tokens are of the same
|
||||||
|
// kind, without checking for the equality of their (possible) inner values.
|
||||||
|
// The issue is if it should advance the parser (the more obvious way), or make
|
||||||
|
// it up to caller to do it (less obvious, but maybe useful?). The other issue
|
||||||
|
// is how should the parser behave on failed call to expect — should it panic,
|
||||||
|
// or maybe we have to introduce more sophisticated error handling? And then
|
||||||
|
// we also would want to use self.peek()? inside its implementation, is it even
|
||||||
|
// possible? So many questions. >_<
|
||||||
impl<'a> Parser<'a> {
|
impl<'a> Parser<'a> {
|
||||||
pub fn new(source: &'a mut Source<'a>) -> Self {
|
pub fn new(source: &'a mut Source<'a>) -> Self {
|
||||||
return Self {
|
return Self {
|
||||||
|
@ -128,6 +169,7 @@ impl<'a> Parser<'a> {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use = "You should always use the return value of the next method, as it mutates the parser."]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn next(&mut self) -> Option<Token<'a>> {
|
fn next(&mut self) -> Option<Token<'a>> {
|
||||||
return self.source.next();
|
return self.source.next();
|
||||||
|
@ -138,19 +180,24 @@ impl<'a> Parser<'a> {
|
||||||
return self.source.peek();
|
return self.source.peek();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A wrapper around next function, which WILL panic if there is no token left
|
||||||
|
// in the source. The intent is to ONLY use it when it is KNOWN that we can
|
||||||
|
// safely take a token without causing a panic, such as when we peek a token.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn bump(&mut self) {
|
fn bump(&mut self) -> Token<'a> {
|
||||||
self.next();
|
return self.next().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_unary_expr(&mut self) -> Option<Expr<'a>> {
|
fn parse_primary_expr(&mut self) -> Option<Expr<'a>> {
|
||||||
return match self.next()? {
|
return match self.next()? {
|
||||||
Token::IntegerLiteral(s) => Some(Expr::Literal(s)),
|
Token::IntegerLiteral(s) => Some(Expr::Literal(s)),
|
||||||
Token::LeftParen => {
|
Token::LeftParen => {
|
||||||
let expr = self.parse_expr(0)?;
|
let expr = self.parse_expr(0)?;
|
||||||
|
|
||||||
return match self.next()? {
|
return match self.next()? {
|
||||||
Token::RightParen => Some(Expr::Paren(box expr)),
|
// Should we bother keeping the parentheses information in the AST?
|
||||||
|
// Token::RightParen => Some(Expr::Paren(box expr)),
|
||||||
|
Token::RightParen => Some(expr),
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
|
@ -158,34 +205,48 @@ impl<'a> Parser<'a> {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_unary_expr(&mut self) -> Option<Expr<'a>> {
|
||||||
|
return match self.peek()? {
|
||||||
|
Token::Plus | Token::Minus => {
|
||||||
|
let token = self.bump();
|
||||||
|
let expr = self.parse_unary_expr()?;
|
||||||
|
Some(Expr::Unary(token, box expr))
|
||||||
|
},
|
||||||
|
_ => self.parse_primary_expr(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// expr = unary_expr [ op expr ]
|
||||||
|
// unary_expr = '+' unary_expr | primary_expr
|
||||||
|
// primary_expr = literal | '(' expr ')'
|
||||||
pub fn parse_expr(&mut self, min_precedence: usize) -> Option<Expr<'a>> {
|
pub fn parse_expr(&mut self, min_precedence: usize) -> Option<Expr<'a>> {
|
||||||
let mut lhs = self.parse_unary_expr()?;
|
let mut lhs = self.parse_unary_expr()?;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match self.peek()? {
|
let token = self.peek()?;
|
||||||
token @ Token::Plus => {
|
let prec = token.precedence();
|
||||||
let prec = token.precedence();
|
|
||||||
if prec <= min_precedence {
|
|
||||||
return Some(lhs);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Don't advance the parser before we make sure that the precedence
|
if prec <= min_precedence {
|
||||||
// is correct.
|
return Some(lhs);
|
||||||
self.bump();
|
|
||||||
|
|
||||||
let rhs = self.parse_expr(prec)?;
|
|
||||||
lhs = Expr::Binary(token, box lhs, box rhs);
|
|
||||||
},
|
|
||||||
_ => return Some(lhs),
|
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
// NOTE: Don't advance the parser before we make sure that the
|
||||||
|
// precedence is correct.
|
||||||
|
self.bump();
|
||||||
|
|
||||||
|
let rhs = self.parse_expr(prec)?;
|
||||||
|
lhs = Expr::Binary(token, box lhs, box rhs);
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
// let inline_source = "3 + 5 + 7;";
|
let inline_source = " // hello, world\n 3;";
|
||||||
|
// let inline_source = "3 + 2 - -5;";
|
||||||
|
// let inline_source = "3 + +5 * +7;";
|
||||||
|
// let inline_source = "3 + 5 * 7;";
|
||||||
// let inline_source = "(3 + 5) + 7;";
|
// let inline_source = "(3 + 5) + 7;";
|
||||||
let inline_source = "3 + (5 + (7 + 11));";
|
// let inline_source = "3 + (5 + (7 + 11));";
|
||||||
let mut source = Source::new(inline_source);
|
let mut source = Source::new(inline_source);
|
||||||
let mut parser = Parser::new(&mut source);
|
let mut parser = Parser::new(&mut source);
|
||||||
eprintln!("{:?}", parser.parse_expr(0));
|
eprintln!("{:?}", parser.parse_expr(0));
|
||||||
|
|
Loading…
Reference in a new issue