Add unary operator parsing

Now we can also parse unary +, i.e.
  +17,
and also expressions that contain it, i.e.
  +17 + 23.

We also now have custom Debug implementation for Expression, which
prints them in more useful, s-expression-like syntax, i.e.
  +17 + 23 => (+ +17 23).

We also change implementation of `TokenStream`s `next` and `parse_next`
methods to allow to easily implement `peek` method for looking at the
(possible) next token without advancing the stream.
This commit is contained in:
Aodhnait Étaín 2021-05-23 00:28:39 +01:00
parent 9539389e4f
commit 3062ac9f45
1 changed files with 78 additions and 17 deletions

View File

@ -91,10 +91,12 @@ fn main() {
let path = path.unwrap(); let path = path.unwrap();
eprintln!("compiling `{}`", path); eprintln!("compiling `{}`", path);
#[allow(non_upper_case_globals)] const source: &'static str = "23 + 21;"; #[allow(non_upper_case_globals)] const source: &'static str = "+17 + 23 + +21;";
let mut tokens = TokenStream::from(source); let mut tokens = TokenStream::from(source);
let expr = parse_expression(&mut tokens); let expr = parse_expression(&mut tokens, 0);
eprintln!("{:?}", expr); eprintln!("{:?}", expr);
// let expr = Some(Expression::Binary(Token::Plus, box Expression::Literal(Token::IntegerLiteral(OffsetStr::from("42"))), box Expression::Unary(Token::Plus, box Expression::Literal(Token::IntegerLiteral(OffsetStr::from("22"))))));
// eprintln!("{:?}", expr);
match output_pretty { match output_pretty {
Some("dot" | "graphdotviz") => expr.then(|e| { Some("dot" | "graphdotviz") => expr.then(|e| {
@ -107,7 +109,7 @@ fn main() {
// a single bit. // a single bit.
Some(_) => unreachable!(), Some(_) => unreachable!(),
None => {}, None => {},
} };
} }
// Represents a type characterised by a parameter T (either the type itself, or // Represents a type characterised by a parameter T (either the type itself, or
@ -135,12 +137,22 @@ impl<T> Then<T> for std::option::Option<T> {
} }
} }
#[derive(Debug)]
enum Expression { enum Expression {
Literal(Token), Literal(Token),
Unary(Token, Box<Expression>),
Binary(Token, Box<Expression>, Box<Expression>), Binary(Token, Box<Expression>, Box<Expression>),
} }
impl std::fmt::Debug for Expression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return match self {
Expression::Literal(token) => write!(f, "{}", token),
Expression::Unary(token, expr) => write!(f, "{}{:?}", token, expr),
Expression::Binary(token, left, right) => write!(f, "({} {:?} {:?})", token, left, right),
};
}
}
struct Counter { struct Counter {
state: usize, state: usize,
} }
@ -168,6 +180,14 @@ impl Expression {
format!("Node{} [label = \"{}\"]", id, i) format!("Node{} [label = \"{}\"]", id, i)
}, },
Expression::Literal(_) => unreachable!(), Expression::Literal(_) => unreachable!(),
Expression::Unary(op, expr) => {
let expr_id = unsafe { GLOBAL_COUNTER.next() };
format!("Node{} -> Node{}\nNode{} [label = \"{}\"]\n{}",
id, expr_id,
id, op,
expr.create_graphviz_graph(expr_id))
},
Expression::Binary(op, left, right) => { Expression::Binary(op, left, right) => {
let left_id = unsafe { GLOBAL_COUNTER.next() }; let left_id = unsafe { GLOBAL_COUNTER.next() };
let right_id = unsafe { GLOBAL_COUNTER.next() }; let right_id = unsafe { GLOBAL_COUNTER.next() };
@ -181,22 +201,57 @@ impl Expression {
} }
} }
fn parse_expression<'a>(tokens: &'a mut TokenStream<'a>) -> Option<Expression> { fn unary_precedence(token: Token) -> Option<usize> {
return match token {
Token::Plus => Some(2),
_ => None,
};
}
fn binary_precedence(token: Token) -> Option<usize> {
return match token {
Token::Plus => Some(1),
_ => None,
};
}
fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precedence: usize) -> Option<Expression> {
let lhs = match tokens.next()? { let lhs = match tokens.next()? {
token @ Token::IntegerLiteral(_) => Expression::Literal(token), token @ Token::IntegerLiteral(_) => Expression::Literal(token),
_ => return None, token => {
}; if let Some(precedence) = unary_precedence(token) {
let expr = parse_expression(tokens, precedence)?;
return match tokens.next()? { Expression::Unary(token, box expr)
operator @ Token::Plus => { } else {
let rhs = parse_expression(tokens)?; return None;
}
Some(Expression::Binary(operator, box lhs, box rhs))
}, },
// If it's not a valid operator, then caller can get rest of the input in the token stream
// it has provided to us.
_ => Some(lhs),
}; };
loop {
let operator = match tokens.peek()? {
operator @ Token::Plus => operator,
_ => return None,
};
let precedence = binary_precedence(operator)?;
if precedence <= highest_precedence {
return Some(lhs);
}
return match operator {
Token::Plus => {
tokens.next();
let rhs = parse_expression(tokens, precedence)?;
Some(Expression::Binary(operator, box lhs, box rhs))
},
// If it's not a valid operator, then caller can get rest of the input in the token stream
// it has provided to us.
_ => Some(lhs),
};
}
} }
struct TokenStream<'a> { struct TokenStream<'a> {
@ -259,11 +314,17 @@ impl<'a> TokenStream<'a> {
c => todo!("character unsupported: `{}`", natural_char_representation(c)), c => todo!("character unsupported: `{}`", natural_char_representation(c)),
}; };
self.cursor += token.len();
return Some(token); return Some(token);
} }
pub fn next(&mut self) -> Option<Token> { pub fn next(&mut self) -> Option<Token> {
self.skip_whitespace();
let token = self.parse_next()?;
self.cursor += token.len();
return Some(token);
}
pub fn peek(&mut self) -> Option<Token> {
self.skip_whitespace(); self.skip_whitespace();
return self.parse_next(); return self.parse_next();
} }