#![feature(box_syntax)] // Try to keep this string updated with the argument parsing, otherwise it will // get confusing for users. static USAGE: &'static str = "usage: pine [options] input options: --help print all options unstable options: --unpretty val print un-prettified representation of the source code valid options for `val` are: dot, graphdotviz (dot-compatible graph)"; fn main() { // Throw away the first argument, which usually is the executable name. let args = std::env::args().skip(1).collect::>(); // If there is no arguments, we short circuit to avoid having to perform the // expensive command-line argument generation and parsing step. We can allow // ourselves to do this since, unlike i.e. rustc, we don't print full usage // information on invocation of only the binary, but instead we behave more // like clang or go. if args.len() == 0 { eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files"); std::process::exit(1); } let mut path: Option<&str> = None; let mut output_pretty: Option<&str> = None; // Handle command-line arguments. let mut i = 0; loop { if i == args.len() { break; } let arg = args[i].as_str(); if arg.starts_with("--") { match &arg[2..] { "help" => { println!("{}\n", USAGE); return; }, "unpretty" => { if i + 1 == args.len() { eprintln!("pine: \x1b[1;31merror\x1b[0m: expected option to '{}'", arg); std::process::exit(1); } output_pretty = match args[i + 1].as_str() { opt @ ("dot" | "graphdotviz") => Some(opt), opt => { eprintln!("pine: \x1b[1;31merror\x1b[0m: invalid option '{}' to '{}'", opt, arg); std::process::exit(1); }, }; i += 1; }, _ => { eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg); std::process::exit(1); }, }; } else if arg.starts_with("-") { // We don't handle arguments that start with a single dash, this might be // added later. For now we just exit with an error. eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg); std::process::exit(1); } else { if path.is_some() { eprintln!("pine: \x1b[1;31merror\x1b[0m: multiple file names provided (first two are `{}` and `{}`)", path.unwrap(), arg); std::process::exit(1); } // Use this argument as an input file. path = Some(arg); } i += 1; } if path.is_none() { eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files"); std::process::exit(1); } let path = path.unwrap(); eprintln!("compiling `{}`", path); #[allow(non_upper_case_globals)] const source: &'static str = "23 + 21;"; let mut tokens = TokenStream::from(source); let expr = parse_expression(&mut tokens); eprintln!("{:?}", expr); match output_pretty { Some("dot" | "graphdotviz") => expr.then(|e| { let graph = e.create_graphviz_graph(unsafe { GLOBAL_COUNTER.next() }); let graphviz_format = "node [shape = box, style = filled, color = \"#bfd1e5\", fontname = monospace, fontsize = 12]"; eprintln!("digraph {{\n{}\n{}\n}}", graphviz_format, graph); }), // This case is validated at the command-line parsing time, and we reject everything // not specified there. This is why this can never happen, unless a solar flare changes // a single bit. Some(_) => unreachable!(), None => {}, } } trait WithContinuation { fn then(&self, f: F) where F: FnOnce(&T); } impl WithContinuation for std::option::Option { fn then(&self, f: F) where F: FnOnce(&T) { match self { None => {}, Some(v) => f(v), }; } } #[derive(Debug)] enum Expression { Literal(Token), Binary(Token, Box, Box), } struct Counter { state: usize, } impl Counter { pub const fn new() -> Self { return Self { state: 0, }; } pub fn next(&mut self) -> usize { let last_state = self.state; self.state += 1; return last_state; } } static mut GLOBAL_COUNTER: Counter = Counter::new(); impl Expression { pub fn create_graphviz_graph(&self, id: usize) -> String { return match self { Expression::Literal(Token::IntegerLiteral(i)) => { format!("Node{} [label = \"{}\"]", id, i) }, Expression::Literal(_) => unreachable!(), Expression::Binary(op, left, right) => { let left_id = unsafe { GLOBAL_COUNTER.next() }; let right_id = unsafe { GLOBAL_COUNTER.next() }; format!("Node{} -> {{ Node{} Node{} }}\nNode{} [label = \"{}\"]\n{}\n{}", id, left_id, right_id, id, op, left.create_graphviz_graph(left_id), right.create_graphviz_graph(right_id)) }, }; } } fn parse_expression<'a>(tokens: &'a mut TokenStream<'a>) -> Option { let lhs = match tokens.next()? { token @ Token::IntegerLiteral(_) => Expression::Literal(token), _ => return None, }; return match tokens.next()? { operator @ Token::Plus => { let rhs = parse_expression(tokens)?; Some(Expression::Binary(operator, box lhs, box rhs)) }, // If it's not a valid operator, then caller can get rest of the input in the token stream // it has provided to us. _ => Some(lhs), }; } struct TokenStream<'a> { source: &'a str, cursor: usize, } impl<'a> TokenStream<'a> { pub fn from(source: &'a str) -> Self { return Self { source, cursor: 0, }; } #[inline(always)] fn chars(&'a self) -> std::str::Chars<'a> { return self.source[self.cursor..].chars(); } pub fn skip_whitespace(&mut self) { let mut chars = self.chars().peekable(); let mut length = 0; loop { match match chars.peek() { None => return, Some(c) => c } { c if c.is_whitespace() => { length += c.len_utf8(); chars.next(); }, _ => break, }; }; self.cursor += length; } pub fn parse_next(&mut self) -> Option { let mut chars = self.chars(); let token = match chars.next()? { '+' => Token::Plus, ';' => Token::Semicolon, c if c.is_numeric() => { let start = self.cursor; let mut length = c.len_utf8(); loop { match chars.next()? { c if c.is_numeric() => length += c.len_utf8(), _ => break, }; }; Token::IntegerLiteral(OffsetStr::from(&self.source[start..start + length])) }, c => todo!("character unsupported: `{}`", natural_char_representation(c)), }; self.cursor += token.len(); return Some(token); } pub fn next(&mut self) -> Option { self.skip_whitespace(); return self.parse_next(); } } fn natural_char_representation(c: char) -> char { return match c { ' ' => '␣', '\t' => '→', '\n' => '⏎', _ => c, }; } #[derive(Copy, Clone)] struct OffsetStr { data: *const u8, length: usize, } impl std::fmt::Display for OffsetStr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { return write!(f, "{}", std::str::from_utf8(unsafe { std::slice::from_raw_parts(self.data, self.length) }).unwrap()); } } impl std::fmt::Debug for OffsetStr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { return write!(f, r#""{}""#, self); } } impl OffsetStr { pub fn from(s: &str) -> Self { return Self { data: s.as_ptr(), length: s.len(), }; } } #[derive(Debug, Copy, Clone)] enum Token { Plus, Semicolon, IntegerLiteral(OffsetStr), } impl std::fmt::Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { return match self { token @ Token::IntegerLiteral(_) => write!(f, "{}", token), token => write!(f, "{}", match token { Token::Plus => "+", Token::Semicolon => ";", _ => unreachable!(), }), }; } } impl Token { pub fn len(&self) -> usize { return match self { Token::Plus | Token::Semicolon => 1, Token::IntegerLiteral(i) => i.length, }; } }