Add unary operator parsing
Now we can also parse unary +, i.e. +17, and also expressions that contain it, i.e. +17 + 23. We also now have custom Debug implementation for Expression, which prints them in more useful, s-expression-like syntax, i.e. +17 + 23 => (+ +17 23). We also change implementation of `TokenStream`s `next` and `parse_next` methods to allow to easily implement `peek` method for looking at the (possible) next token without advancing the stream.
This commit is contained in:
parent
9539389e4f
commit
3062ac9f45
95
src/main.rs
95
src/main.rs
|
@ -91,10 +91,12 @@ fn main() {
|
||||||
let path = path.unwrap();
|
let path = path.unwrap();
|
||||||
eprintln!("compiling `{}`", path);
|
eprintln!("compiling `{}`", path);
|
||||||
|
|
||||||
#[allow(non_upper_case_globals)] const source: &'static str = "23 + 21;";
|
#[allow(non_upper_case_globals)] const source: &'static str = "+17 + 23 + +21;";
|
||||||
let mut tokens = TokenStream::from(source);
|
let mut tokens = TokenStream::from(source);
|
||||||
let expr = parse_expression(&mut tokens);
|
let expr = parse_expression(&mut tokens, 0);
|
||||||
eprintln!("{:?}", expr);
|
eprintln!("{:?}", expr);
|
||||||
|
// let expr = Some(Expression::Binary(Token::Plus, box Expression::Literal(Token::IntegerLiteral(OffsetStr::from("42"))), box Expression::Unary(Token::Plus, box Expression::Literal(Token::IntegerLiteral(OffsetStr::from("22"))))));
|
||||||
|
// eprintln!("{:?}", expr);
|
||||||
|
|
||||||
match output_pretty {
|
match output_pretty {
|
||||||
Some("dot" | "graphdotviz") => expr.then(|e| {
|
Some("dot" | "graphdotviz") => expr.then(|e| {
|
||||||
|
@ -107,7 +109,7 @@ fn main() {
|
||||||
// a single bit.
|
// a single bit.
|
||||||
Some(_) => unreachable!(),
|
Some(_) => unreachable!(),
|
||||||
None => {},
|
None => {},
|
||||||
}
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Represents a type characterised by a parameter T (either the type itself, or
|
// Represents a type characterised by a parameter T (either the type itself, or
|
||||||
|
@ -135,12 +137,22 @@ impl<T> Then<T> for std::option::Option<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
enum Expression {
|
enum Expression {
|
||||||
Literal(Token),
|
Literal(Token),
|
||||||
|
Unary(Token, Box<Expression>),
|
||||||
Binary(Token, Box<Expression>, Box<Expression>),
|
Binary(Token, Box<Expression>, Box<Expression>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for Expression {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
return match self {
|
||||||
|
Expression::Literal(token) => write!(f, "{}", token),
|
||||||
|
Expression::Unary(token, expr) => write!(f, "{}{:?}", token, expr),
|
||||||
|
Expression::Binary(token, left, right) => write!(f, "({} {:?} {:?})", token, left, right),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct Counter {
|
struct Counter {
|
||||||
state: usize,
|
state: usize,
|
||||||
}
|
}
|
||||||
|
@ -168,6 +180,14 @@ impl Expression {
|
||||||
format!("Node{} [label = \"{}\"]", id, i)
|
format!("Node{} [label = \"{}\"]", id, i)
|
||||||
},
|
},
|
||||||
Expression::Literal(_) => unreachable!(),
|
Expression::Literal(_) => unreachable!(),
|
||||||
|
Expression::Unary(op, expr) => {
|
||||||
|
let expr_id = unsafe { GLOBAL_COUNTER.next() };
|
||||||
|
|
||||||
|
format!("Node{} -> Node{}\nNode{} [label = \"{}\"]\n{}",
|
||||||
|
id, expr_id,
|
||||||
|
id, op,
|
||||||
|
expr.create_graphviz_graph(expr_id))
|
||||||
|
},
|
||||||
Expression::Binary(op, left, right) => {
|
Expression::Binary(op, left, right) => {
|
||||||
let left_id = unsafe { GLOBAL_COUNTER.next() };
|
let left_id = unsafe { GLOBAL_COUNTER.next() };
|
||||||
let right_id = unsafe { GLOBAL_COUNTER.next() };
|
let right_id = unsafe { GLOBAL_COUNTER.next() };
|
||||||
|
@ -181,22 +201,57 @@ impl Expression {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_expression<'a>(tokens: &'a mut TokenStream<'a>) -> Option<Expression> {
|
fn unary_precedence(token: Token) -> Option<usize> {
|
||||||
|
return match token {
|
||||||
|
Token::Plus => Some(2),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn binary_precedence(token: Token) -> Option<usize> {
|
||||||
|
return match token {
|
||||||
|
Token::Plus => Some(1),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precedence: usize) -> Option<Expression> {
|
||||||
let lhs = match tokens.next()? {
|
let lhs = match tokens.next()? {
|
||||||
token @ Token::IntegerLiteral(_) => Expression::Literal(token),
|
token @ Token::IntegerLiteral(_) => Expression::Literal(token),
|
||||||
_ => return None,
|
token => {
|
||||||
};
|
if let Some(precedence) = unary_precedence(token) {
|
||||||
|
let expr = parse_expression(tokens, precedence)?;
|
||||||
return match tokens.next()? {
|
Expression::Unary(token, box expr)
|
||||||
operator @ Token::Plus => {
|
} else {
|
||||||
let rhs = parse_expression(tokens)?;
|
return None;
|
||||||
|
}
|
||||||
Some(Expression::Binary(operator, box lhs, box rhs))
|
|
||||||
},
|
},
|
||||||
// If it's not a valid operator, then caller can get rest of the input in the token stream
|
|
||||||
// it has provided to us.
|
|
||||||
_ => Some(lhs),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let operator = match tokens.peek()? {
|
||||||
|
operator @ Token::Plus => operator,
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let precedence = binary_precedence(operator)?;
|
||||||
|
|
||||||
|
if precedence <= highest_precedence {
|
||||||
|
return Some(lhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
return match operator {
|
||||||
|
Token::Plus => {
|
||||||
|
tokens.next();
|
||||||
|
let rhs = parse_expression(tokens, precedence)?;
|
||||||
|
|
||||||
|
Some(Expression::Binary(operator, box lhs, box rhs))
|
||||||
|
},
|
||||||
|
// If it's not a valid operator, then caller can get rest of the input in the token stream
|
||||||
|
// it has provided to us.
|
||||||
|
_ => Some(lhs),
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct TokenStream<'a> {
|
struct TokenStream<'a> {
|
||||||
|
@ -259,11 +314,17 @@ impl<'a> TokenStream<'a> {
|
||||||
c => todo!("character unsupported: `{}`", natural_char_representation(c)),
|
c => todo!("character unsupported: `{}`", natural_char_representation(c)),
|
||||||
};
|
};
|
||||||
|
|
||||||
self.cursor += token.len();
|
|
||||||
return Some(token);
|
return Some(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next(&mut self) -> Option<Token> {
|
pub fn next(&mut self) -> Option<Token> {
|
||||||
|
self.skip_whitespace();
|
||||||
|
let token = self.parse_next()?;
|
||||||
|
self.cursor += token.len();
|
||||||
|
return Some(token);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn peek(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.skip_whitespace();
|
||||||
return self.parse_next();
|
return self.parse_next();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue