417 lines
10 KiB
Rust
417 lines
10 KiB
Rust
#![feature(box_syntax)]
|
|
|
|
// Try to keep this string updated with the argument parsing, otherwise it will
|
|
// get confusing for users.
|
|
static USAGE: &'static str = "usage: pine [options] input
|
|
|
|
options:
|
|
--help print all options
|
|
|
|
unstable options:
|
|
--unpretty val print un-prettified representation of the source code
|
|
valid options for `val` are:
|
|
dot, graphdotviz (dot-compatible graph)";
|
|
|
|
fn main() {
|
|
// Throw away the first argument, which usually is the executable name.
|
|
let args = std::env::args().skip(1).collect::<Vec<_>>();
|
|
|
|
// If there is no arguments, we short circuit to avoid having to perform the
|
|
// expensive command-line argument generation and parsing step. We can allow
|
|
// ourselves to do this since, unlike i.e. rustc, we don't print full usage
|
|
// information on invocation of only the binary, but instead we behave more
|
|
// like clang or go.
|
|
if args.len() == 0 {
|
|
eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files");
|
|
std::process::exit(1);
|
|
}
|
|
|
|
let mut path: Option<&str> = None;
|
|
let mut output_pretty: Option<&str> = None;
|
|
|
|
// Handle command-line arguments.
|
|
let mut i = 0;
|
|
loop {
|
|
if i == args.len() {
|
|
break;
|
|
}
|
|
|
|
let arg = args[i].as_str();
|
|
|
|
if arg.starts_with("--") {
|
|
match &arg[2..] {
|
|
"help" => {
|
|
println!("{}\n", USAGE);
|
|
return;
|
|
},
|
|
"unpretty" => {
|
|
if i + 1 == args.len() {
|
|
eprintln!("pine: \x1b[1;31merror\x1b[0m: expected option to '{}'", arg);
|
|
std::process::exit(1);
|
|
}
|
|
|
|
output_pretty = match args[i + 1].as_str() {
|
|
opt @ ("dot" | "graphdotviz") => Some(opt),
|
|
opt => {
|
|
eprintln!("pine: \x1b[1;31merror\x1b[0m: invalid option '{}' to '{}'", opt, arg);
|
|
std::process::exit(1);
|
|
},
|
|
};
|
|
|
|
i += 1;
|
|
},
|
|
_ => {
|
|
eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg);
|
|
std::process::exit(1);
|
|
},
|
|
};
|
|
} else if arg.starts_with("-") {
|
|
// We don't handle arguments that start with a single dash, this might be
|
|
// added later. For now we just exit with an error.
|
|
eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg);
|
|
std::process::exit(1);
|
|
} else {
|
|
if path.is_some() {
|
|
eprintln!("pine: \x1b[1;31merror\x1b[0m: multiple file names provided (first two are `{}` and `{}`)", path.unwrap(), arg);
|
|
std::process::exit(1);
|
|
}
|
|
|
|
// Use this argument as an input file.
|
|
path = Some(arg);
|
|
}
|
|
|
|
i += 1;
|
|
}
|
|
|
|
if path.is_none() {
|
|
eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files");
|
|
std::process::exit(1);
|
|
}
|
|
|
|
let path = path.unwrap();
|
|
eprintln!("compiling `{}`", path);
|
|
|
|
// #[allow(non_upper_case_globals)] const source: &'static str = "+17 + 23 + +21 + 11;";
|
|
#[allow(non_upper_case_globals)] const source: &'static str = "11 + 13 * 17 + 19;";
|
|
let mut tokens = TokenStream::from(source);
|
|
let expr = parse_expression(&mut tokens, 0);
|
|
eprintln!("{:?}", expr);
|
|
|
|
match output_pretty {
|
|
Some("dot" | "graphdotviz") => expr.then(|e| {
|
|
let graph = e.create_graphviz_graph(unsafe { GRAPHVIZ_NODE_COUNTER.next() });
|
|
let graphviz_format = "node [shape = box, style = filled, color = \"#bfd1e5\", fontname = monospace, fontsize = 12]";
|
|
eprintln!("digraph {{\n{}\n{}\n}}", graphviz_format, graph);
|
|
}),
|
|
// This case is validated at the command-line parsing time, and we reject everything
|
|
// not specified there. This is why this can never happen, unless a solar flare changes
|
|
// a single bit.
|
|
Some(_) => unreachable!(),
|
|
None => {},
|
|
};
|
|
}
|
|
|
|
// Represents a type characterised by a parameter T (either the type itself, or
|
|
// a type inside it, as in case of Option<T>), on which we can call a procedure
|
|
// that doesn't return anything.
|
|
//
|
|
// This is similar to `and_then` method that Option and Result expose, the only
|
|
// difference being in that this is only useful for procedures that perform
|
|
// side effects, as we don't return either the original value, or a new value
|
|
// being a result of the procedure.
|
|
//
|
|
// The function name this trait exposes is similar to `then` method found on
|
|
// bool type, as the original intent was to call it if the value is Some, and
|
|
// do nothing if it's None.
|
|
trait Then<T> {
|
|
fn then<F>(&self, f: F) where F: FnOnce(&T);
|
|
}
|
|
|
|
impl<T> Then<T> for std::option::Option<T> {
|
|
fn then<F>(&self, f: F) where F: FnOnce(&T) {
|
|
match self {
|
|
None => {},
|
|
Some(v) => f(v),
|
|
};
|
|
}
|
|
}
|
|
|
|
enum Expression {
|
|
Literal(Token),
|
|
Unary(Token, Box<Expression>),
|
|
Binary(Token, Box<Expression>, Box<Expression>),
|
|
}
|
|
|
|
impl std::fmt::Debug for Expression {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
return match self {
|
|
Expression::Literal(token) => write!(f, "{}", token),
|
|
Expression::Unary(token, expr) => write!(f, "{}{:?}", token, expr),
|
|
Expression::Binary(token, left, right) => write!(f, "({} {:?} {:?})", token, left, right),
|
|
};
|
|
}
|
|
}
|
|
|
|
struct Counter {
|
|
state: usize,
|
|
}
|
|
|
|
impl Counter {
|
|
pub const fn new() -> Self {
|
|
return Self {
|
|
state: 0,
|
|
};
|
|
}
|
|
|
|
pub fn next(&mut self) -> usize {
|
|
let last_state = self.state;
|
|
self.state += 1;
|
|
return last_state;
|
|
}
|
|
}
|
|
|
|
// Used for numbering nodes in GraphViz printer.
|
|
static mut GRAPHVIZ_NODE_COUNTER: Counter = Counter::new();
|
|
|
|
impl Expression {
|
|
pub fn create_graphviz_graph(&self, id: usize) -> String {
|
|
return match self {
|
|
Expression::Literal(Token::IntegerLiteral(i)) => {
|
|
format!("Node{} [label = \"{}\"]", id, i)
|
|
},
|
|
Expression::Literal(_) => unreachable!(),
|
|
Expression::Unary(op, expr) => {
|
|
let expr_id = unsafe { GRAPHVIZ_NODE_COUNTER.next() };
|
|
|
|
format!("Node{} -> Node{}\nNode{} [label = \"{}\"]\n{}",
|
|
id, expr_id,
|
|
id, op,
|
|
expr.create_graphviz_graph(expr_id))
|
|
},
|
|
Expression::Binary(op, left, right) => {
|
|
let left_id = unsafe { GRAPHVIZ_NODE_COUNTER.next() };
|
|
let right_id = unsafe { GRAPHVIZ_NODE_COUNTER.next() };
|
|
|
|
format!("Node{} -> {{ Node{} Node{} }}\nNode{} [label = \"{}\"]\n{}\n{}",
|
|
id, left_id, right_id,
|
|
id, op,
|
|
left.create_graphviz_graph(left_id), right.create_graphviz_graph(right_id))
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
fn unary_precedence(token: Token) -> Option<usize> {
|
|
return match token {
|
|
Token::Plus => Some(3),
|
|
_ => None,
|
|
};
|
|
}
|
|
|
|
fn binary_precedence(token: Token) -> Option<usize> {
|
|
return match token {
|
|
Token::Asterisk => Some(2),
|
|
Token::Plus => Some(1),
|
|
_ => None,
|
|
};
|
|
}
|
|
|
|
fn is_binary_operator(token: Token) -> bool {
|
|
return match token {
|
|
Token::Plus | Token::Asterisk => true,
|
|
_ => false,
|
|
};
|
|
}
|
|
|
|
fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precedence: usize) -> Option<Expression> {
|
|
let mut lhs = match tokens.next()? {
|
|
token @ Token::IntegerLiteral(_) => Expression::Literal(token),
|
|
token => {
|
|
if let Some(precedence) = unary_precedence(token) {
|
|
let expr = parse_expression(tokens, precedence)?;
|
|
Expression::Unary(token, box expr)
|
|
} else {
|
|
return None;
|
|
}
|
|
},
|
|
};
|
|
|
|
loop {
|
|
let operator = match tokens.peek()? {
|
|
operator if is_binary_operator(operator) => operator,
|
|
_ => return Some(lhs),
|
|
};
|
|
|
|
let precedence = binary_precedence(operator)?;
|
|
|
|
if precedence <= highest_precedence {
|
|
return Some(lhs);
|
|
}
|
|
|
|
if is_binary_operator(operator) {
|
|
tokens.next();
|
|
let rhs = parse_expression(tokens, precedence)?;
|
|
lhs = Expression::Binary(operator, box lhs, box rhs);
|
|
|
|
if tokens.peek().map(is_binary_operator).unwrap_or(false) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return Some(lhs);
|
|
}
|
|
}
|
|
|
|
struct TokenStream<'a> {
|
|
source: &'a str,
|
|
cursor: usize,
|
|
last: Option<Token>,
|
|
}
|
|
|
|
impl<'a> TokenStream<'a> {
|
|
pub fn from(source: &'a str) -> Self {
|
|
return Self {
|
|
source,
|
|
cursor: 0,
|
|
last: None,
|
|
};
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn chars(&'a self) -> std::str::Chars<'a> {
|
|
return self.source[self.cursor..].chars();
|
|
}
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
let mut chars = self.chars().peekable();
|
|
let mut length = 0;
|
|
|
|
loop {
|
|
match match chars.peek() {
|
|
None => return,
|
|
Some(c) => c
|
|
} {
|
|
c if c.is_whitespace() => {
|
|
length += c.len_utf8();
|
|
chars.next();
|
|
},
|
|
_ => break,
|
|
};
|
|
};
|
|
|
|
self.cursor += length;
|
|
}
|
|
|
|
pub fn parse_next(&mut self) -> Option<Token> {
|
|
let mut chars = self.chars();
|
|
|
|
let token = match chars.next()? {
|
|
'*' => Token::Asterisk,
|
|
'+' => Token::Plus,
|
|
';' => Token::Semicolon,
|
|
c if c.is_numeric() => {
|
|
let start = self.cursor;
|
|
let mut length = c.len_utf8();
|
|
|
|
loop {
|
|
match chars.next()? {
|
|
c if c.is_numeric() => length += c.len_utf8(),
|
|
_ => break,
|
|
};
|
|
};
|
|
|
|
Token::IntegerLiteral(OffsetStr::from(&self.source[start..start + length]))
|
|
},
|
|
c => todo!("character unsupported: `{}`", natural_char_representation(c)),
|
|
};
|
|
|
|
return Some(token);
|
|
}
|
|
|
|
pub fn next(&mut self) -> Option<Token> {
|
|
let token = match self.last {
|
|
Some(_) => std::mem::take(&mut self.last).unwrap(),
|
|
None => {
|
|
self.skip_whitespace();
|
|
self.parse_next()?
|
|
},
|
|
};
|
|
|
|
self.cursor += token.len();
|
|
return Some(token);
|
|
}
|
|
|
|
pub fn peek(&mut self) -> Option<Token> {
|
|
self.skip_whitespace();
|
|
self.last = Some(self.parse_next()?);
|
|
return self.last;
|
|
}
|
|
}
|
|
|
|
fn natural_char_representation(c: char) -> char {
|
|
return match c {
|
|
' ' => '␣',
|
|
'\t' => '→',
|
|
'\n' => '⏎',
|
|
_ => c,
|
|
};
|
|
}
|
|
|
|
#[derive(Copy, Clone)]
|
|
struct OffsetStr {
|
|
data: *const u8,
|
|
length: usize,
|
|
}
|
|
|
|
impl std::fmt::Display for OffsetStr {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
return write!(f, "{}", std::str::from_utf8(unsafe { std::slice::from_raw_parts(self.data, self.length) }).unwrap());
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Debug for OffsetStr {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
return write!(f, r#""{}""#, self);
|
|
}
|
|
}
|
|
|
|
impl OffsetStr {
|
|
pub fn from(s: &str) -> Self {
|
|
return Self {
|
|
data: s.as_ptr(),
|
|
length: s.len(),
|
|
};
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone)]
|
|
enum Token {
|
|
Plus,
|
|
Asterisk,
|
|
Semicolon,
|
|
IntegerLiteral(OffsetStr),
|
|
}
|
|
|
|
impl std::fmt::Display for Token {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
return match self {
|
|
Token::IntegerLiteral(s) => write!(f, "{}", s),
|
|
token => write!(f, "{}", match token {
|
|
Token::Plus => "+",
|
|
Token::Asterisk => "*",
|
|
Token::Semicolon => ";",
|
|
_ => unreachable!(),
|
|
}),
|
|
};
|
|
}
|
|
}
|
|
|
|
impl Token {
|
|
pub fn len(&self) -> usize {
|
|
return match self {
|
|
Token::Plus | Token::Asterisk | Token::Semicolon => 1,
|
|
Token::IntegerLiteral(i) => i.length,
|
|
};
|
|
}
|
|
}
|