pine/src/main.rs

233 lines
5.2 KiB
Rust

// Try to keep this string updated with the argument parsing, otherwise it will
// get confusing for users.
static USAGE: &'static str = "usage: pine [options] input
options:
--help print all options";
fn main() {
// Throw away the first argument, which usually is the executable name.
let args = std::env::args().skip(1).collect::<Vec<_>>();
// If there is no arguments, we short circuit to avoid having to perform the
// expensive command-line argument generation and parsing step. We can allow
// ourselves to do this since, unlike i.e. rustc, we don't print full usage
// information on invocation of only the binary, but instead we behave more
// like clang or go.
if args.len() == 0 {
eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files");
std::process::exit(1);
}
let mut path: Option<&str> = None;
// Handle command-line arguments.
let mut i = 0;
loop {
if i == args.len() {
break;
}
let arg = args[i].as_str();
if arg.starts_with("--") {
match &arg[2..] {
"help" => {
println!("{}\n", USAGE);
return;
},
_ => {
eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg);
std::process::exit(1);
},
};
} else if arg.starts_with("-") {
// We don't handle arguments that start with a single dash, this might be
// added later. For now we just exit with an error.
eprintln!("pine: \x1b[1;31merror\x1b[0m: unknown argument '{}'", arg);
std::process::exit(1);
} else {
if path.is_some() {
eprintln!("pine: \x1b[1;31merror\x1b[0m: multiple file names provided (first two are `{}` and `{}`)", path.unwrap(), arg);
std::process::exit(1);
}
// Use this argument as an input file.
path = Some(arg);
}
i += 1;
}
if path.is_none() {
eprintln!("pine: \x1b[1;31merror\x1b[0m: no input files");
std::process::exit(1);
}
let path = path.unwrap();
eprintln!("compiling `{}`", path);
#[allow(non_upper_case_globals)] const source: &'static str = "23 + 21;";
let mut tokens = TokenStream::from(source);
let expr = parse_expression(&mut tokens);
eprintln!("{:?}", expr);
}
#[derive(Debug)]
enum Expression {
Literal(Token),
Binary(Token, Box<Expression>, Box<Expression>),
}
fn parse_expression<'a>(tokens: &'a mut TokenStream<'a>) -> Option<Expression> {
let lhs = match tokens.next()? {
token @ Token::IntegerLiteral(_) => Expression::Literal(token),
_ => return None,
};
return match tokens.next()? {
operator @ Token::Plus => {
let rhs = parse_expression(tokens)?;
Some(Expression::Binary(operator, box lhs, box rhs))
},
// If it's not a valid operator, then caller can get rest of the input in the token stream
// it has provided to us.
_ => Some(lhs),
};
}
struct TokenStream<'a> {
source: &'a str,
cursor: usize,
}
impl<'a> TokenStream<'a> {
pub fn from(source: &'a str) -> Self {
return Self {
source,
cursor: 0,
};
}
#[inline(always)]
fn chars(&'a self) -> std::str::Chars<'a> {
return self.source[self.cursor..].chars();
}
pub fn skip_whitespace(&mut self) {
let mut chars = self.chars().peekable();
let mut length = 0;
loop {
match match chars.peek() {
None => return,
Some(c) => c
} {
c if c.is_whitespace() => {
length += c.len_utf8();
chars.next();
},
_ => break,
};
};
self.cursor += length;
}
pub fn parse_next(&mut self) -> Option<Token> {
let mut chars = self.chars();
let token = match chars.next()? {
'+' => Token::Plus,
';' => Token::Semicolon,
c if c.is_numeric() => {
let start = self.cursor;
let mut length = c.len_utf8();
loop {
match chars.next()? {
c if c.is_numeric() => length += c.len_utf8(),
_ => break,
};
};
Token::IntegerLiteral(OffsetStr::from(&self.source[start..start + length]))
},
c => todo!("character unsupported: `{}`", natural_char_representation(c)),
};
self.cursor += token.len();
return Some(token);
}
pub fn next(&mut self) -> Option<Token> {
self.skip_whitespace();
return self.parse_next();
}
}
fn natural_char_representation(c: char) -> char {
return match c {
' ' => '␣',
'\t' => '→',
'\n' => '⏎',
_ => c,
};
}
#[derive(Copy, Clone)]
struct OffsetStr {
data: *const u8,
length: usize,
}
impl std::fmt::Display for OffsetStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return write!(f, "{}", std::str::from_utf8(unsafe { std::slice::from_raw_parts(self.data, self.length) }).unwrap());
}
}
impl std::fmt::Debug for OffsetStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return write!(f, r#""{}""#, self);
}
}
impl OffsetStr {
pub fn from(s: &str) -> Self {
return Self {
data: s.as_ptr(),
length: s.len(),
};
}
}
#[derive(Debug, Copy, Clone)]
enum Token {
Plus,
Semicolon,
IntegerLiteral(OffsetStr),
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return match self {
token @ Token::IntegerLiteral(_) => write!(f, "{}", token),
token => write!(f, "{}", match token {
Token::Plus => "+",
Token::Semicolon => ";",
_ => unreachable!(),
}),
};
}
}
impl Token {
pub fn len(&self) -> usize {
return match self {
Token::Plus | Token::Semicolon => 1,
Token::IntegerLiteral(i) => i.length,
};
}
}