Add unary operator parsing

Now we can also parse unary +, i.e.
  +17,
and also expressions that contain it, i.e.
  +17 + 23.

We also now have custom Debug implementation for Expression, which
prints them in more useful, s-expression-like syntax, i.e.
  +17 + 23 => (+ +17 23).

We also change implementation of `TokenStream`s `next` and `parse_next`
methods to allow to easily implement `peek` method for looking at the
(possible) next token without advancing the stream.
This commit is contained in:
Aodhnait Étaín 2021-05-23 00:28:39 +01:00
parent 9539389e4f
commit 3062ac9f45

View file

@ -91,10 +91,12 @@ fn main() {
let path = path.unwrap();
eprintln!("compiling `{}`", path);
#[allow(non_upper_case_globals)] const source: &'static str = "23 + 21;";
#[allow(non_upper_case_globals)] const source: &'static str = "+17 + 23 + +21;";
let mut tokens = TokenStream::from(source);
let expr = parse_expression(&mut tokens);
let expr = parse_expression(&mut tokens, 0);
eprintln!("{:?}", expr);
// let expr = Some(Expression::Binary(Token::Plus, box Expression::Literal(Token::IntegerLiteral(OffsetStr::from("42"))), box Expression::Unary(Token::Plus, box Expression::Literal(Token::IntegerLiteral(OffsetStr::from("22"))))));
// eprintln!("{:?}", expr);
match output_pretty {
Some("dot" | "graphdotviz") => expr.then(|e| {
@ -107,7 +109,7 @@ fn main() {
// a single bit.
Some(_) => unreachable!(),
None => {},
}
};
}
// Represents a type characterised by a parameter T (either the type itself, or
@ -135,12 +137,22 @@ impl<T> Then<T> for std::option::Option<T> {
}
}
#[derive(Debug)]
enum Expression {
Literal(Token),
Unary(Token, Box<Expression>),
Binary(Token, Box<Expression>, Box<Expression>),
}
impl std::fmt::Debug for Expression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return match self {
Expression::Literal(token) => write!(f, "{}", token),
Expression::Unary(token, expr) => write!(f, "{}{:?}", token, expr),
Expression::Binary(token, left, right) => write!(f, "({} {:?} {:?})", token, left, right),
};
}
}
struct Counter {
state: usize,
}
@ -168,6 +180,14 @@ impl Expression {
format!("Node{} [label = \"{}\"]", id, i)
},
Expression::Literal(_) => unreachable!(),
Expression::Unary(op, expr) => {
let expr_id = unsafe { GLOBAL_COUNTER.next() };
format!("Node{} -> Node{}\nNode{} [label = \"{}\"]\n{}",
id, expr_id,
id, op,
expr.create_graphviz_graph(expr_id))
},
Expression::Binary(op, left, right) => {
let left_id = unsafe { GLOBAL_COUNTER.next() };
let right_id = unsafe { GLOBAL_COUNTER.next() };
@ -181,22 +201,57 @@ impl Expression {
}
}
fn parse_expression<'a>(tokens: &'a mut TokenStream<'a>) -> Option<Expression> {
fn unary_precedence(token: Token) -> Option<usize> {
return match token {
Token::Plus => Some(2),
_ => None,
};
}
fn binary_precedence(token: Token) -> Option<usize> {
return match token {
Token::Plus => Some(1),
_ => None,
};
}
fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precedence: usize) -> Option<Expression> {
let lhs = match tokens.next()? {
token @ Token::IntegerLiteral(_) => Expression::Literal(token),
_ => return None,
};
return match tokens.next()? {
operator @ Token::Plus => {
let rhs = parse_expression(tokens)?;
Some(Expression::Binary(operator, box lhs, box rhs))
token => {
if let Some(precedence) = unary_precedence(token) {
let expr = parse_expression(tokens, precedence)?;
Expression::Unary(token, box expr)
} else {
return None;
}
},
// If it's not a valid operator, then caller can get rest of the input in the token stream
// it has provided to us.
_ => Some(lhs),
};
loop {
let operator = match tokens.peek()? {
operator @ Token::Plus => operator,
_ => return None,
};
let precedence = binary_precedence(operator)?;
if precedence <= highest_precedence {
return Some(lhs);
}
return match operator {
Token::Plus => {
tokens.next();
let rhs = parse_expression(tokens, precedence)?;
Some(Expression::Binary(operator, box lhs, box rhs))
},
// If it's not a valid operator, then caller can get rest of the input in the token stream
// it has provided to us.
_ => Some(lhs),
};
}
}
struct TokenStream<'a> {
@ -259,11 +314,17 @@ impl<'a> TokenStream<'a> {
c => todo!("character unsupported: `{}`", natural_char_representation(c)),
};
self.cursor += token.len();
return Some(token);
}
pub fn next(&mut self) -> Option<Token> {
self.skip_whitespace();
let token = self.parse_next()?;
self.cursor += token.len();
return Some(token);
}
pub fn peek(&mut self) -> Option<Token> {
self.skip_whitespace();
return self.parse_next();
}