pine/src/main.rs

324 lines
7.6 KiB
Rust

#![feature(box_syntax)]
// Try to keep this string updated with the argument parsing, otherwise it will
// get confusing for users.
static USAGE: &'static str = "usage: pine [options] input
options:
--help print all options
unstable options:
--unpretty val print un-prettified representation of the source code
valid options for `val` are:
dot, graphdotviz (dot-compatible graph)";
fn main() {
// Throw away the first argument, which usually is the executable name.
let args = std::env::args().skip(1).collect::<Vec<_>>();
// If there is no arguments, we short circuit to avoid having to perform the
// expensive command-line argument generation and parsing step. We can allow
// ourselves to do this since, unlike i.e. rustc, we don't print full usage
// information on invocation of only the binary, but instead we behave more
// like clang or go.
if args.len() == 0 {
eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files");
std::process::exit(1);
}
let mut path: Option<&str> = None;
let mut output_pretty: Option<&str> = None;
// Handle command-line arguments.
let mut i = 0;
loop {
if i == args.len() {
break;
}
let arg = args[i].as_str();
if arg.starts_with("--") {
match &arg[2..] {
"help" => {
println!("{}\n", USAGE);
return;
},
"unpretty" => {
if i + 1 == args.len() {
eprintln!("pine: \x1b[1;31merror\x1b[0m: expected option to '{}'", arg);
std::process::exit(1);
}
output_pretty = match args[i + 1].as_str() {
opt @ ("dot" | "graphdotviz") => Some(opt),
opt => {
eprintln!("pine: \x1b[1;31merror\x1b[0m: invalid option '{}' to '{}'", opt, arg);
std::process::exit(1);
},
};
i += 1;
},
_ => {
eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg);
std::process::exit(1);
},
};
} else if arg.starts_with("-") {
// We don't handle arguments that start with a single dash, this might be
// added later. For now we just exit with an error.
eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg);
std::process::exit(1);
} else {
if path.is_some() {
eprintln!("pine: \x1b[1;31merror\x1b[0m: multiple file names provided (first two are `{}` and `{}`)", path.unwrap(), arg);
std::process::exit(1);
}
// Use this argument as an input file.
path = Some(arg);
}
i += 1;
}
if path.is_none() {
eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files");
std::process::exit(1);
}
let path = path.unwrap();
eprintln!("compiling `{}`", path);
#[allow(non_upper_case_globals)] const source: &'static str = "23 + 21;";
let mut tokens = TokenStream::from(source);
let expr = parse_expression(&mut tokens);
eprintln!("{:?}", expr);
match output_pretty {
Some("dot" | "graphdotviz") => expr.then(|e| {
let graph = e.create_graphviz_graph(unsafe { GLOBAL_COUNTER.next() });
let graphviz_format = "node [shape = box, style = filled, color = \"#bfd1e5\", fontname = monospace, fontsize = 12]";
eprintln!("digraph {{\n{}\n{}\n}}", graphviz_format, graph);
}),
// This case is validated at the command-line parsing time, and we reject everything
// not specified there. This is why this can never happen, unless a solar flare changes
// a single bit.
Some(_) => unreachable!(),
None => {},
}
}
trait WithContinuation<T> {
fn then<F>(&self, f: F) where F: FnOnce(&T);
}
impl<T> WithContinuation<T> for std::option::Option<T> {
fn then<F>(&self, f: F) where F: FnOnce(&T) {
match self {
None => {},
Some(v) => f(v),
};
}
}
#[derive(Debug)]
enum Expression {
Literal(Token),
Binary(Token, Box<Expression>, Box<Expression>),
}
struct Counter {
state: usize,
}
impl Counter {
pub const fn new() -> Self {
return Self {
state: 0,
};
}
pub fn next(&mut self) -> usize {
let last_state = self.state;
self.state += 1;
return last_state;
}
}
static mut GLOBAL_COUNTER: Counter = Counter::new();
impl Expression {
pub fn create_graphviz_graph(&self, id: usize) -> String {
return match self {
Expression::Literal(Token::IntegerLiteral(i)) => {
format!("Node{} [label = \"{}\"]", id, i)
},
Expression::Literal(_) => unreachable!(),
Expression::Binary(op, left, right) => {
let left_id = unsafe { GLOBAL_COUNTER.next() };
let right_id = unsafe { GLOBAL_COUNTER.next() };
format!("Node{} -> {{ Node{} Node{} }}\nNode{} [label = \"{}\"]\n{}\n{}",
id, left_id, right_id,
id, op,
left.create_graphviz_graph(left_id), right.create_graphviz_graph(right_id))
},
};
}
}
fn parse_expression<'a>(tokens: &'a mut TokenStream<'a>) -> Option<Expression> {
let lhs = match tokens.next()? {
token @ Token::IntegerLiteral(_) => Expression::Literal(token),
_ => return None,
};
return match tokens.next()? {
operator @ Token::Plus => {
let rhs = parse_expression(tokens)?;
Some(Expression::Binary(operator, box lhs, box rhs))
},
// If it's not a valid operator, then caller can get rest of the input in the token stream
// it has provided to us.
_ => Some(lhs),
};
}
struct TokenStream<'a> {
source: &'a str,
cursor: usize,
}
impl<'a> TokenStream<'a> {
pub fn from(source: &'a str) -> Self {
return Self {
source,
cursor: 0,
};
}
#[inline(always)]
fn chars(&'a self) -> std::str::Chars<'a> {
return self.source[self.cursor..].chars();
}
pub fn skip_whitespace(&mut self) {
let mut chars = self.chars().peekable();
let mut length = 0;
loop {
match match chars.peek() {
None => return,
Some(c) => c
} {
c if c.is_whitespace() => {
length += c.len_utf8();
chars.next();
},
_ => break,
};
};
self.cursor += length;
}
pub fn parse_next(&mut self) -> Option<Token> {
let mut chars = self.chars();
let token = match chars.next()? {
'+' => Token::Plus,
';' => Token::Semicolon,
c if c.is_numeric() => {
let start = self.cursor;
let mut length = c.len_utf8();
loop {
match chars.next()? {
c if c.is_numeric() => length += c.len_utf8(),
_ => break,
};
};
Token::IntegerLiteral(OffsetStr::from(&self.source[start..start + length]))
},
c => todo!("character unsupported: `{}`", natural_char_representation(c)),
};
self.cursor += token.len();
return Some(token);
}
pub fn next(&mut self) -> Option<Token> {
self.skip_whitespace();
return self.parse_next();
}
}
fn natural_char_representation(c: char) -> char {
return match c {
' ' => '␣',
'\t' => '→',
'\n' => '⏎',
_ => c,
};
}
#[derive(Copy, Clone)]
struct OffsetStr {
data: *const u8,
length: usize,
}
impl std::fmt::Display for OffsetStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return write!(f, "{}", std::str::from_utf8(unsafe { std::slice::from_raw_parts(self.data, self.length) }).unwrap());
}
}
impl std::fmt::Debug for OffsetStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return write!(f, r#""{}""#, self);
}
}
impl OffsetStr {
pub fn from(s: &str) -> Self {
return Self {
data: s.as_ptr(),
length: s.len(),
};
}
}
#[derive(Debug, Copy, Clone)]
enum Token {
Plus,
Semicolon,
IntegerLiteral(OffsetStr),
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return match self {
token @ Token::IntegerLiteral(_) => write!(f, "{}", token),
token => write!(f, "{}", match token {
Token::Plus => "+",
Token::Semicolon => ";",
_ => unreachable!(),
}),
};
}
}
impl Token {
pub fn len(&self) -> usize {
return match self {
Token::Plus | Token::Semicolon => 1,
Token::IntegerLiteral(i) => i.length,
};
}
}