Amo/src/parser/mod.rs

271 lines
6.7 KiB
Rust

//! Tools to convert a stream of tokens into a parse tree
//!
//! Parsing is the stage that follows lexing, wherein a linear stream of tokens is
//! converted into a tree structure that reflects the syntax of the language. This is
//! also where syntax errors are caught.
//!
//! amo uses a push-based parsing system, where state is represented using a
//! [`PartialTree`], which can [`accept()`](PartialTree::accept) new tokens, producing a
//! new state.
//!
//! The root of the parser is typically [`crate::parser::program`].
use std::{ops::Range, mem::{Discriminant, self}};
use logos::Lexer;
use crate::{token::{Token, InfixRank1, InfixRank6, InfixRank5, InfixRank3, InfixRank2, InfixRank4}, cons};
use std::mem::discriminant;
pub mod program;
pub mod declaration;
pub mod expr;
pub mod pattern;
pub mod infix;
pub trait Parsable: Sized {
fn expected() -> (Vec<Discriminant<Token>>, bool);
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError>;
fn matches(l: &Token) -> bool {
let (expected, can_zero_width) = Self::expected();
can_zero_width || expected.contains(&discriminant(l))
}
}
/// Information about a parsing error that happened somewhere
#[derive(Debug)]
pub struct ParseError {
/// The span of the token that the parse error occured on
pub location: Range<usize>,
/// The specific error that occured, with more details
pub expected: Vec<Discriminant<Token>>,
}
pub struct WrappedLexer<'a>(Lexer<'a, Token>, Token);
impl<'a> WrappedLexer<'a> {
pub fn new(mut l: Lexer<'a, Token>) -> WrappedLexer {
let token = l.next().unwrap_or(Token::EOF);
WrappedLexer(l, token)
}
pub fn curtok(&self) -> &Token {
&self.1
}
pub fn monch(&mut self) -> Token {
let mut old = self.0.next().unwrap_or(Token::EOF);
mem::swap(&mut old, &mut self.1);
old
}
pub fn span(&self) -> Range<usize> {
self.0.span()
}
}
impl<P: Parsable> Parsable for Box<P> {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
P::expected()
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
P::parse(l).map(Box::new)
}
}
impl<P: Parsable> Parsable for Option<P> {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(P::expected().0, true)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
if P::matches(l.curtok()) {
P::parse(l).map(Some)
} else {
Ok(None)
}
}
}
impl<A: Parsable, B: Parsable> Parsable for (A, B) {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
let (a_expct, a_zw) = A::expected();
if a_zw {
let (b_expct, b_zw) = B::expected();
(
a_expct.into_iter()
.chain(b_expct.into_iter())
.collect(),
b_zw
)
} else {
(a_expct, false)
}
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let a = A::parse(l)?;
let b = B::parse(l)?;
Ok((a, b))
}
}
impl Parsable for String {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::Symbol("".to_owned()))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::Symbol(s) => Ok(s),
_ => Err(ParseError {
location: span,
expected: Self::expected().0,
})
}
}
}
impl<P: Parsable> Parsable for Vec<P> {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(P::expected().0, true)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
vec_parse_inner(l, Vec::new())
}
}
fn vec_parse_inner<P: Parsable>(l: &mut WrappedLexer, collected: Vec<P>) -> Result<Vec<P>,ParseError> {
if P::matches(l.curtok()) {
let new_element = P::parse(l)?;
vec_parse_inner(l, cons(collected, new_element))
} else {
Ok(collected)
}
}
pub fn parse_delineated_vec<P: Parsable>(l: &mut WrappedLexer, delineator: Discriminant<Token>) -> Result<Vec<P>, ParseError> {
parse_delineated_vec_inner(l, delineator, Vec::new())
}
fn parse_delineated_vec_inner<P: Parsable>(l: &mut WrappedLexer, d: Discriminant<Token>, acc: Vec<P>) -> Result<Vec<P>, ParseError> {
if P::matches(l.curtok()) {
let acc = cons(acc, P::parse(l)?);
if d == discriminant(l.curtok()) {
l.monch();
parse_delineated_vec_inner(l, d, acc)
} else { Ok(acc) }
} else { Ok(acc) }
}
pub fn absorb_token_or_error(l: &mut WrappedLexer, t: Discriminant<Token>) -> Result<Token, ParseError> {
if discriminant(l.curtok()) == t {
Ok(l.monch())
} else {
Err(ParseError {
location: l.span(),
expected: vec![t],
})
}
}
impl Parsable for InfixRank1 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R1Infix(InfixRank1::LOr))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R1Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank2 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R2Infix(InfixRank2::LAnd))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R2Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank3 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R3Infix(InfixRank3::Eq))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R3Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank4 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R4Infix(InfixRank4::Range))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R4Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank5 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R5Infix(InfixRank5::Add))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R5Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank6 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R6Infix(InfixRank6::Mul))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R6Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}