pine/src/main.rs

209 lines
5.1 KiB
Rust
Raw Normal View History

#![feature(box_syntax)]
static LOWEST_PRECEDENCE: usize = 0;
#[derive(Debug, Copy, Clone)]
enum Token<'a> {
IntegerLiteral(&'a str),
Plus,
Star,
Semicolon,
LeftParen,
RightParen,
}
impl Token<'_> {
// Returns length of that token in bytes, which is used for advancing the
// cursor in the lexer.
pub fn len(&self) -> usize {
return match self {
Token::IntegerLiteral(s) => s.len(),
Token::Plus | Token::Semicolon | Token::LeftParen | Token::RightParen | Token::Star => 1,
};
}
pub fn precedence(&self) -> usize {
return match self {
Token::Plus => 1,
Token::Star => 2,
_ => LOWEST_PRECEDENCE,
}
}
}
struct Source<'a> {
source: &'a str,
cursor: usize,
last: Option<Token<'a>>,
}
impl<'a> Source<'a> {
pub fn new(source: &'a str) -> Self {
return Self {
source,
cursor: 0,
last: None,
};
}
fn skip_whitespace(&mut self) {
let mut chars = self.source[self.cursor..].chars();
while let Some(c) = chars.next() {
if c.is_whitespace() {
self.cursor += c.len_utf8();
} else {
return;
}
}
}
fn get_next(&mut self) -> Option<Token<'a>> {
self.skip_whitespace();
let mut chars = self.source[self.cursor..].chars();
let token = match chars.next()? {
'+' => Token::Plus,
'*' => Token::Star,
';' => Token::Semicolon,
'(' => Token::LeftParen,
')' => Token::RightParen,
c if c.is_ascii_digit() => {
let start = self.cursor;
let mut length = c.len_utf8();
loop {
match chars.next()? {
c if c.is_ascii_digit() => length += c.len_utf8(),
_ => break,
};
};
Token::IntegerLiteral(&self.source[start..start + length])
},
c => todo!("invalid character `{:?}`", c)
};
return Some(token);
}
pub fn next(&mut self) -> Option<Token<'a>> {
let token = match self.last {
Some(t) => {
self.last = None;
t
},
None => self.get_next()?,
};
self.cursor += token.len();
return Some(token);
}
pub fn peek(&mut self) -> Option<Token<'a>> {
// We unwrap and then wrap it again as it makes more semantic sense, since
// an Option that get_next returns is not connected to what peek returns.
// In future we might want to add more sophisticated error handling to that
// function, and then it will get easier to refactor. More so, we avoid
// exposing lexer's internal state to the user.
self.last = Some(self.get_next()?);
return self.last;
}
}
// Represents a dynamic parsing process, will get converted to ast::Tree after
// it completes.
struct Parser<'a> {
source: &'a mut Source<'a>,
}
#[derive(Debug)]
enum Expr<'a> {
Literal(&'a str),
// Paren(Box<Expr<'a>>),
Binary(Token<'a>, Box<Expr<'a>>, Box<Expr<'a>>),
}
// TODO: Add expect method, which checks for whether two tokens are of the same
// kind, without checking for the equality of their (possible) inner values.
// The issue is if it should advance the parser (the more obvious way), or make
// it up to caller to do it (less obvious, but maybe useful?). The other issue
// is how should the parser behave on failed call to expect — should it panic,
// or maybe we have to introduce more sophisticated error handling? And then
// we also would want to use self.peek()? inside its implementation, is it even
// possible? So many questions. >_<
impl<'a> Parser<'a> {
pub fn new(source: &'a mut Source<'a>) -> Self {
return Self {
source,
};
}
#[must_use = "You should always use the return value of the next method, as it mutates the parser."]
#[inline(always)]
fn next(&mut self) -> Option<Token<'a>> {
return self.source.next();
}
#[inline(always)]
fn peek(&mut self) -> Option<Token<'a>> {
return self.source.peek();
}
// Just a wrapper around self.next which disables unused_must_use lint, so it
// can be more useful in some cases (such as when we already know what's the
// following token, for example from the previous call to self.peek, but we
// want to advance the parser now, and don't care about the result).
#[allow(unused_must_use)]
#[inline(always)]
fn bump(&mut self) {
self.next();
}
fn parse_primary_expr(&mut self) -> Option<Expr<'a>> {
return match self.next()? {
Token::IntegerLiteral(s) => Some(Expr::Literal(s)),
Token::LeftParen => {
let expr = self.parse_expr(0)?;
return match self.next()? {
// Should we bother keeping the parentheses information in the AST?
// Token::RightParen => Some(Expr::Paren(box expr)),
Token::RightParen => Some(expr),
_ => None,
};
},
_ => None,
};
}
pub fn parse_expr(&mut self, min_precedence: usize) -> Option<Expr<'a>> {
let mut lhs = self.parse_primary_expr()?;
loop {
let token = self.peek()?;
let prec = token.precedence();
if prec <= min_precedence {
return Some(lhs);
};
// NOTE: Don't advance the parser before we make sure that the
// precedence is correct.
self.bump();
let rhs = self.parse_expr(prec)?;
lhs = Expr::Binary(token, box lhs, box rhs);
};
}
}
fn main() {
// let inline_source = "3 + 5 * 7;";
// let inline_source = "(3 + 5) + 7;";
let inline_source = "3 + (5 + (7 + 11));";
let mut source = Source::new(inline_source);
let mut parser = Parser::new(&mut source);
eprintln!("{:?}", parser.parse_expr(0));
2021-05-22 20:34:55 +00:00
}