#![feature(box_syntax)] // Try to keep this string updated with the argument parsing, otherwise it will // get confusing for users. static USAGE: &'static str = "usage: pine [options] input options: --help print all options unstable options: --unpretty val print un-prettified representation of the source code valid options for `val` are: dot, graphdotviz (dot-compatible graph)"; fn main() { // Throw away the first argument, which usually is the executable name. let args = std::env::args().skip(1).collect::>(); // If there is no arguments, we short circuit to avoid having to perform the // expensive command-line argument generation and parsing step. We can allow // ourselves to do this since, unlike i.e. rustc, we don't print full usage // information on invocation of only the binary, but instead we behave more // like clang or go. if args.len() == 0 { eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files"); std::process::exit(1); } let mut path: Option<&str> = None; let mut output_pretty: Option<&str> = None; // Handle command-line arguments. let mut i = 0; loop { if i == args.len() { break; } let arg = args[i].as_str(); if arg.starts_with("--") { match &arg[2..] { "help" => { println!("{}\n", USAGE); return; }, "unpretty" => { if i + 1 == args.len() { eprintln!("pine: \x1b[1;31merror\x1b[0m: expected option to '{}'", arg); std::process::exit(1); } output_pretty = match args[i + 1].as_str() { opt @ ("dot" | "graphdotviz") => Some(opt), opt => { eprintln!("pine: \x1b[1;31merror\x1b[0m: invalid option '{}' to '{}'", opt, arg); std::process::exit(1); }, }; i += 1; }, _ => { eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg); std::process::exit(1); }, }; } else if arg.starts_with("-") { // We don't handle arguments that start with a single dash, this might be // added later. For now we just exit with an error. eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg); std::process::exit(1); } else { if path.is_some() { eprintln!("pine: \x1b[1;31merror\x1b[0m: multiple file names provided (first two are `{}` and `{}`)", path.unwrap(), arg); std::process::exit(1); } // Use this argument as an input file. path = Some(arg); } i += 1; } if path.is_none() { eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files"); std::process::exit(1); } let path = path.unwrap(); eprintln!("compiling `{}`", path); #[allow(non_upper_case_globals)] const source: &'static str = "+17 + 23 + +21;"; let mut tokens = TokenStream::from(source); let expr = parse_expression(&mut tokens, 0); eprintln!("{:?}", expr); // let expr = Some(Expression::Binary(Token::Plus, box Expression::Literal(Token::IntegerLiteral(OffsetStr::from("42"))), box Expression::Unary(Token::Plus, box Expression::Literal(Token::IntegerLiteral(OffsetStr::from("22")))))); // eprintln!("{:?}", expr); match output_pretty { Some("dot" | "graphdotviz") => expr.then(|e| { let graph = e.create_graphviz_graph(unsafe { GLOBAL_COUNTER.next() }); let graphviz_format = "node [shape = box, style = filled, color = \"#bfd1e5\", fontname = monospace, fontsize = 12]"; eprintln!("digraph {{\n{}\n{}\n}}", graphviz_format, graph); }), // This case is validated at the command-line parsing time, and we reject everything // not specified there. This is why this can never happen, unless a solar flare changes // a single bit. Some(_) => unreachable!(), None => {}, }; } // Represents a type characterised by a parameter T (either the type itself, or // a type inside it, as in case of Option), on which we can call a procedure // that doesn't return anything. // // This is similar to `and_then` method that Option and Result expose, the only // difference being in that this is only useful for procedures that perform // side effects, as we don't return either the original value, or a new value // being a result of the procedure. // // The function name this trait exposes is similar to `then` method found on // bool type, as the original intent was to call it if the value is Some, and // do nothing if it's None. trait Then { fn then(&self, f: F) where F: FnOnce(&T); } impl Then for std::option::Option { fn then(&self, f: F) where F: FnOnce(&T) { match self { None => {}, Some(v) => f(v), }; } } enum Expression { Literal(Token), Unary(Token, Box), Binary(Token, Box, Box), } impl std::fmt::Debug for Expression { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { return match self { Expression::Literal(token) => write!(f, "{}", token), Expression::Unary(token, expr) => write!(f, "{}{:?}", token, expr), Expression::Binary(token, left, right) => write!(f, "({} {:?} {:?})", token, left, right), }; } } struct Counter { state: usize, } impl Counter { pub const fn new() -> Self { return Self { state: 0, }; } pub fn next(&mut self) -> usize { let last_state = self.state; self.state += 1; return last_state; } } static mut GLOBAL_COUNTER: Counter = Counter::new(); impl Expression { pub fn create_graphviz_graph(&self, id: usize) -> String { return match self { Expression::Literal(Token::IntegerLiteral(i)) => { format!("Node{} [label = \"{}\"]", id, i) }, Expression::Literal(_) => unreachable!(), Expression::Unary(op, expr) => { let expr_id = unsafe { GLOBAL_COUNTER.next() }; format!("Node{} -> Node{}\nNode{} [label = \"{}\"]\n{}", id, expr_id, id, op, expr.create_graphviz_graph(expr_id)) }, Expression::Binary(op, left, right) => { let left_id = unsafe { GLOBAL_COUNTER.next() }; let right_id = unsafe { GLOBAL_COUNTER.next() }; format!("Node{} -> {{ Node{} Node{} }}\nNode{} [label = \"{}\"]\n{}\n{}", id, left_id, right_id, id, op, left.create_graphviz_graph(left_id), right.create_graphviz_graph(right_id)) }, }; } } fn unary_precedence(token: Token) -> Option { return match token { Token::Plus => Some(2), _ => None, }; } fn binary_precedence(token: Token) -> Option { return match token { Token::Plus => Some(1), _ => None, }; } fn parse_expression<'a, 'b: 'a>(tokens: &'a mut TokenStream<'b>, highest_precedence: usize) -> Option { let lhs = match tokens.next()? { token @ Token::IntegerLiteral(_) => Expression::Literal(token), token => { if let Some(precedence) = unary_precedence(token) { let expr = parse_expression(tokens, precedence)?; Expression::Unary(token, box expr) } else { return None; } }, }; loop { let operator = match tokens.peek()? { operator @ Token::Plus => operator, _ => return None, }; let precedence = binary_precedence(operator)?; if precedence <= highest_precedence { return Some(lhs); } return match operator { Token::Plus => { tokens.next(); let rhs = parse_expression(tokens, precedence)?; Some(Expression::Binary(operator, box lhs, box rhs)) }, // If it's not a valid operator, then caller can get rest of the input in the token stream // it has provided to us. _ => Some(lhs), }; } } struct TokenStream<'a> { source: &'a str, cursor: usize, last: Option, } impl<'a> TokenStream<'a> { pub fn from(source: &'a str) -> Self { return Self { source, cursor: 0, last: None, }; } #[inline(always)] fn chars(&'a self) -> std::str::Chars<'a> { return self.source[self.cursor..].chars(); } pub fn skip_whitespace(&mut self) { let mut chars = self.chars().peekable(); let mut length = 0; loop { match match chars.peek() { None => return, Some(c) => c } { c if c.is_whitespace() => { length += c.len_utf8(); chars.next(); }, _ => break, }; }; self.cursor += length; } pub fn parse_next(&mut self) -> Option { let mut chars = self.chars(); let token = match chars.next()? { '+' => Token::Plus, ';' => Token::Semicolon, c if c.is_numeric() => { let start = self.cursor; let mut length = c.len_utf8(); loop { match chars.next()? { c if c.is_numeric() => length += c.len_utf8(), _ => break, }; }; Token::IntegerLiteral(OffsetStr::from(&self.source[start..start + length])) }, c => todo!("character unsupported: `{}`", natural_char_representation(c)), }; return Some(token); } pub fn next(&mut self) -> Option { let token = match self.last { Some(_) => std::mem::take(&mut self.last).unwrap(), None => { self.skip_whitespace(); self.parse_next()? }, }; self.cursor += token.len(); return Some(token); } pub fn peek(&mut self) -> Option { self.skip_whitespace(); self.last = Some(self.parse_next()?); return self.last; } } fn natural_char_representation(c: char) -> char { return match c { ' ' => '␣', '\t' => '→', '\n' => '⏎', _ => c, }; } #[derive(Copy, Clone)] struct OffsetStr { data: *const u8, length: usize, } impl std::fmt::Display for OffsetStr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { return write!(f, "{}", std::str::from_utf8(unsafe { std::slice::from_raw_parts(self.data, self.length) }).unwrap()); } } impl std::fmt::Debug for OffsetStr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { return write!(f, r#""{}""#, self); } } impl OffsetStr { pub fn from(s: &str) -> Self { return Self { data: s.as_ptr(), length: s.len(), }; } } #[derive(Debug, Copy, Clone)] enum Token { Plus, Semicolon, IntegerLiteral(OffsetStr), } impl std::fmt::Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { return match self { Token::IntegerLiteral(s) => write!(f, "{}", s), token => write!(f, "{}", match token { Token::Plus => "+", Token::Semicolon => ";", _ => unreachable!(), }), }; } } impl Token { pub fn len(&self) -> usize { return match self { Token::Plus | Token::Semicolon => 1, Token::IntegerLiteral(i) => i.length, }; } }