pine/src/main.rs

417 lines
10 KiB
Rust

#![feature(box_syntax)]
// Try to keep this string updated with the argument parsing, otherwise it will
// get confusing for users.
static USAGE: &'static str = "usage: pine [options] input
options:
--help print all options
unstable options:
--unpretty val print un-prettified representation of the source code
valid options for `val` are:
dot, graphdotviz (dot-compatible graph)";
fn main() {
// Throw away the first argument, which usually is the executable name.
let args = std::env::args().skip(1).collect::<Vec<_>>();
// If there is no arguments, we short circuit to avoid having to perform the
// expensive command-line argument generation and parsing step. We can allow
// ourselves to do this since, unlike i.e. rustc, we don't print full usage
// information on invocation of only the binary, but instead we behave more
// like clang or go.
if args.len() == 0 {
eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files");
std::process::exit(1);
}
let mut path: Option<&str> = None;
let mut output_pretty: Option<&str> = None;
// Handle command-line arguments.
let mut i = 0;
loop {
if i == args.len() {
break;
}
let arg = args[i].as_str();
if arg.starts_with("--") {
match &arg[2..] {
"help" => {
println!("{}\n", USAGE);
return;
},
"unpretty" => {
if i + 1 == args.len() {
eprintln!("pine: \x1b[1;31merror\x1b[0m: expected option to '{}'", arg);
std::process::exit(1);
}
output_pretty = match args[i + 1].as_str() {
opt @ ("dot" | "graphdotviz") => Some(opt),
opt => {
eprintln!("pine: \x1b[1;31merror\x1b[0m: invalid option '{}' to '{}'", opt, arg);
std::process::exit(1);
},
};
i += 1;
},
_ => {
eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg);
std::process::exit(1);
},
};
} else if arg.starts_with("-") {
// We don't handle arguments that start with a single dash, this might be
// added later. For now we just exit with an error.
eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg);
std::process::exit(1);
} else {
if path.is_some() {
eprintln!("pine: \x1b[1;31merror\x1b[0m: multiple file names provided (first two are `{}` and `{}`)", path.unwrap(), arg);
std::process::exit(1);
}
// Use this argument as an input file.
path = Some(arg);
}
i += 1;
}
if path.is_none() {
eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files");
std::process::exit(1);
}
let path = path.unwrap();
eprintln!("compiling `{}`", path);
// #[allow(non_upper_case_globals)] const source: &'static str = "+17 + 23 + +21 + 11;";
#[allow(non_upper_case_globals)] const source: &'static str = "11 + 13 * 17 + 19;";
let mut tokens = TokenStream::from(source);
let expr = parse_expression(&mut tokens, 0);
eprintln!("{:?}", expr);
match output_pretty {
Some("dot" | "graphdotviz") => expr.then(|e| {
let graph = e.create_graphviz_graph(unsafe { GRAPHVIZ_NODE_COUNTER.next() });
let graphviz_format = "node [shape = box, style = filled, color = \"#bfd1e5\", fontname = monospace, fontsize = 12]";
eprintln!("digraph {{\n{}\n{}\n}}", graphviz_format, graph);
}),
// This case is validated at the command-line parsing time, and we reject everything
// not specified there. This is why this can never happen, unless a solar flare changes
// a single bit.
Some(_) => unreachable!(),
None => {},
};
}
// Represents a type characterised by a parameter T (either the type itself, or
// a type inside it, as in case of Option<T>), on which we can call a procedure
// that doesn't return anything.
//
// This is similar to `and_then` method that Option and Result expose, the only
// difference being in that this is only useful for procedures that perform
// side effects, as we don't return either the original value, or a new value
// being a result of the procedure.
//
// The function name this trait exposes is similar to `then` method found on
// bool type, as the original intent was to call it if the value is Some, and
// do nothing if it's None.
trait Then<T> {
fn then<F>(&self, f: F) where F: FnOnce(&T);
}
impl<T> Then<T> for std::option::Option<T> {
fn then<F>(&self, f: F) where F: FnOnce(&T) {
match self {
None => {},
Some(v) => f(v),
};
}
}
enum Expression {
Literal(Token),
Unary(Token, Box<Expression>),
Binary(Token, Box<Expression>, Box<Expression>),
}
impl std::fmt::Debug for Expression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return match self {
Expression::Literal(token) => write!(f, "{}", token),
Expression::Unary(token, expr) => write!(f, "{}{:?}", token, expr),
Expression::Binary(token, left, right) => write!(f, "({} {:?} {:?})", token, left, right),
};
}
}
struct Counter {
state: usize,
}
impl Counter {
pub const fn new() -> Self {
return Self {
state: 0,
};
}
pub fn next(&mut self) -> usize {
let last_state = self.state;
self.state += 1;
return last_state;
}
}
// Used for numbering nodes in GraphViz printer.
static mut GRAPHVIZ_NODE_COUNTER: Counter = Counter::new();
impl Expression {
pub fn create_graphviz_graph(&self, id: usize) -> String {
return match self {
Expression::Literal(Token::IntegerLiteral(i)) => {
format!("Node{} [label = \"{}\"]", id, i)
},
Expression::Literal(_) => unreachable!(),
Expression::Unary(op, expr) => {
let expr_id = unsafe { GRAPHVIZ_NODE_COUNTER.next() };
format!("Node{} -> Node{}\nNode{} [label = \"{}\"]\n{}",
id, expr_id,
id, op,
expr.create_graphviz_graph(expr_id))
},
Expression::Binary(op, left, right) => {
let left_id = unsafe { GRAPHVIZ_NODE_COUNTER.next() };
let right_id = unsafe { GRAPHVIZ_NODE_COUNTER.next() };
format!("Node{} -> {{ Node{} Node{} }}\nNode{} [label = \"{}\"]\n{}\n{}",
id, left_id, right_id,
id, op,
left.create_graphviz_graph(left_id), right.create_graphviz_graph(right_id))
},
};
}
}
fn unary_precedence(token: Token) -> Option<usize> {
return match token {
Token::Plus => Some(3),
_ => None,
};
}
fn binary_precedence(token: Token) -> Option<usize> {
return match token {
Token::Asterisk => Some(2),
Token::Plus => Some(1),
_ => None,
};
}
fn is_binary_operator(token: Token) -> bool {
return match token {
Token::Plus | Token::Asterisk => true,
_ => false,
};
}
fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precedence: usize) -> Option<Expression> {
let mut lhs = match tokens.next()? {
token @ Token::IntegerLiteral(_) => Expression::Literal(token),
token => {
if let Some(precedence) = unary_precedence(token) {
let expr = parse_expression(tokens, precedence)?;
Expression::Unary(token, box expr)
} else {
return None;
}
},
};
loop {
let operator = match tokens.peek()? {
operator if is_binary_operator(operator) => operator,
_ => return Some(lhs),
};
let precedence = binary_precedence(operator)?;
if precedence <= highest_precedence {
return Some(lhs);
}
if is_binary_operator(operator) {
tokens.next();
let rhs = parse_expression(tokens, precedence)?;
lhs = Expression::Binary(operator, box lhs, box rhs);
if tokens.peek().map(is_binary_operator).unwrap_or(false) {
continue;
}
}
return Some(lhs);
}
}
struct TokenStream<'a> {
source: &'a str,
cursor: usize,
last: Option<Token>,
}
impl<'a> TokenStream<'a> {
pub fn from(source: &'a str) -> Self {
return Self {
source,
cursor: 0,
last: None,
};
}
#[inline(always)]
fn chars(&'a self) -> std::str::Chars<'a> {
return self.source[self.cursor..].chars();
}
pub fn skip_whitespace(&mut self) {
let mut chars = self.chars().peekable();
let mut length = 0;
loop {
match match chars.peek() {
None => return,
Some(c) => c
} {
c if c.is_whitespace() => {
length += c.len_utf8();
chars.next();
},
_ => break,
};
};
self.cursor += length;
}
pub fn parse_next(&mut self) -> Option<Token> {
let mut chars = self.chars();
let token = match chars.next()? {
'*' => Token::Asterisk,
'+' => Token::Plus,
';' => Token::Semicolon,
c if c.is_numeric() => {
let start = self.cursor;
let mut length = c.len_utf8();
loop {
match chars.next()? {
c if c.is_numeric() => length += c.len_utf8(),
_ => break,
};
};
Token::IntegerLiteral(OffsetStr::from(&self.source[start..start + length]))
},
c => todo!("character unsupported: `{}`", natural_char_representation(c)),
};
return Some(token);
}
pub fn next(&mut self) -> Option<Token> {
let token = match self.last {
Some(_) => std::mem::take(&mut self.last).unwrap(),
None => {
self.skip_whitespace();
self.parse_next()?
},
};
self.cursor += token.len();
return Some(token);
}
pub fn peek(&mut self) -> Option<Token> {
self.skip_whitespace();
self.last = Some(self.parse_next()?);
return self.last;
}
}
fn natural_char_representation(c: char) -> char {
return match c {
' ' => '␣',
'\t' => '→',
'\n' => '⏎',
_ => c,
};
}
#[derive(Copy, Clone)]
struct OffsetStr {
data: *const u8,
length: usize,
}
impl std::fmt::Display for OffsetStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return write!(f, "{}", std::str::from_utf8(unsafe { std::slice::from_raw_parts(self.data, self.length) }).unwrap());
}
}
impl std::fmt::Debug for OffsetStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return write!(f, r#""{}""#, self);
}
}
impl OffsetStr {
pub fn from(s: &str) -> Self {
return Self {
data: s.as_ptr(),
length: s.len(),
};
}
}
#[derive(Debug, Copy, Clone)]
enum Token {
Plus,
Asterisk,
Semicolon,
IntegerLiteral(OffsetStr),
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return match self {
Token::IntegerLiteral(s) => write!(f, "{}", s),
token => write!(f, "{}", match token {
Token::Plus => "+",
Token::Asterisk => "*",
Token::Semicolon => ";",
_ => unreachable!(),
}),
};
}
}
impl Token {
pub fn len(&self) -> usize {
return match self {
Token::Plus | Token::Asterisk | Token::Semicolon => 1,
Token::IntegerLiteral(i) => i.length,
};
}
}