Added parsing!!

This commit is contained in:
Emi Simpson 2022-04-06 09:23:46 -04:00
parent 92ad2bc118
commit afbb45caac
Signed by: Emi
GPG key ID: A12F2C2FFDC3D847
14 changed files with 1078 additions and 118 deletions

44
grammar-initial.ebnf Normal file
View file

@ -0,0 +1,44 @@
<program> ::= { DeclarationStart <declaration> }
<declaration> ::= Type Symbol Assign <variant_decl> { VBar <variant_decl> }
| Symbol Colon <full_type> DeclarationStart Symbol { Symbol } Assign <expr>
<variant_declaration> ::= Symbol { <grouped_type> }
<full_type> ::= <tight_type> [ Comma [ <additional_type> ] [ Aro <full_type> ] ]
<additional_type> ::= <tight_type> [ Comma [ <additional_type> ] ]
<tight_type> ::= OpenParen <full_type> CloseParen
| Symbol [ OpenSquareBracket <domain> CloseSquareBracket ]
<domain> ::= <expr> [ Comma [ <domain> ] ]
<expr> ::= If <expr> <ifblock>
| <let> { <let> } In <expr>
| <infix_expr>
<tightexpr> ::= Literal
| Symbol
| OpenParen <expr> CloseParen
<let> ::= Let Symbol Assign <expr>
<ifblock> ::= Is <case> { Comma <case> } [ Comma ]
| Then <expr> Else <expr>
<case> ::= <binding_pattern> Aro <expr>
<rank1_expr> ::= <rank2_expr> { <rank1_op> <rank2_expr> }
<rank2_expr> ::= <rank3_expr> { <rank2_op> <rank3_expr> }
<rank3_expr> ::= <rank4_expr> { <rank3_op> <rank4_expr> }
<rank4_expr> ::= <rank5_expr> { <rank5_expr> }
<rank5_expr> ::= <rank6_expr> { <rank5_op> <rank6_expr> }
<rank6_expr> ::= <rank7_expr> { <rank6_op> <rank7_expr> }
<rank7_expr> ::= <tightexpr> { <rank7_op> <tightexpr> }
<rank1_op> ::= LOr
<rank2_op> ::= LAnd
<rank3_op> ::= Eq | NEq | LessThan | GreaterThan
<rank5_op> ::= Range
<rank6_op> ::= Add | Sub
<rank7_op> ::= Mult | Div | Mod
<binding_pattern> ::= Symbol { <tight_binding_pattern> }
<tight_binding_pattern> ::= Literal
| Symbol
| OpenParen <binding_pattern> CloseParen

View file

@ -1,9 +1,9 @@
<program> ::= { DeclarationStart <declaration> }
<declaration> ::= Type Symbol [ <typeargs> ] { Symbol } Assign <variant_decl> { VBar <variant_decl> }
| Struct Symbol { Symbol } Assign { <def_field> }
| Trait Symbol [ <typeargs> ] [ On <composite_type> ] Needs { <def_field> }
| Impl Symbol [ <typerags> ] On <composite_type> Colon <impl> { <impl> }
<declaration> ::= Type Symbol { <typearg> } Assign <variant_decl> { VBar <variant_decl> }
| Struct Symbol { <typearg> } Assign { <def_field> }
| Trait Symbol { <typearg> } [ On <composite_type> ] Needs { <def_field> }
| Impl Symbol [ <typearg> ] On <composite_type> Colon <impl> { <impl> }
| Symbol <def>
<variant_declaration> ::= Symbol { <grouped_type> }
@ -15,15 +15,11 @@
<grouped_type> ::= <simple_type>
| OpenParen <full_type> CloseParen
<simple_type> ::= Symbol [ OpenSquareBracket <domain> CloseSquareBracket ]
<domain> ::= Int Range Int
| Int
<simple_type> ::= Symbol [ OpenSquareBracket <expr> CloseSquareBracket ]
<def_field> ::= Dot Symbol Colon <full_type>
<typeargs> ::= OpenSouareBracket <typearg> { , <typearg> } CloseSquareBracket
<typearg> ::= Symbol Colon <full_type>
<typearg> ::= Symbol [OpenSquareBracket <full_type> { Comma <full_type> } CloseSquareBracket]
<impl> ::= Dot Symbol { Symbol } Assign <expr>
@ -33,8 +29,7 @@
<expr> ::= <tightexpr> { <tightexpr> }
| If <expr> <ifblock>
<expr> ::= If <expr> <ifblock>
| <let> { <let> } In <expr>
| <infix_expr>
<tightexpr> ::= Literal
@ -51,19 +46,21 @@
<case> ::= <binding_pattern> Aro <expr>
<infix_expr> ::= <infix_rank2_expr> { <infix_rank1_op> <infix_rank2_expr> }
<infix_rank2_expr> ::= <infix_rank3_expr> { <infix_rank2_op> <infix_rank3_expr> }
<infix_rank3_expr> ::= <infix_rank4_expr> { <infix_rank3_op> <infix_rank4_expr> }
<infix_rank4_expr> ::= <infix_rank5_expr> { <infix_rank4_op> <infix_rank5_expr> }
<infix_rank5_expr> ::= <tightexpr> { <infix_rank5_op> <tightexpr> }
<infix_rank1_expr> ::= <infix_rank2_expr> [ <infix_rank1_op> <infix_rank1_expr> ]
<infix_rank2_expr> ::= <infix_rank3_expr> [ <infix_rank2_op> <infix_rank2_expr> ]
<infix_rank3_expr> ::= <infix_rank4_expr> [ <infix_rank3_op> <infix_rank3_expr> ]
<infix_rank4_expr> ::= <infix_rank5_expr> [ <infix_rank4_op> <infix_rank4_expr> ]
<infix_rank5_expr> ::= <call_or_access> [ <infix_rank5_op> <infix_rank5_expr> ]
<infix_rank1_op> ::= Mult | Div | Mod
<infix_rank2_op> ::= Add | Sub
<infix_rank3_op> ::= Eq | NEq | LessThan | GreaterThan
<infix_rank4_op> ::= LAnd
<infix_rank5_op> ::= LOr
<infix_rank3_op> ::= Range
<infix_rank4_op> ::= Eq | NEq | LessThan | GreaterThan
<infix_rank5_op> ::= LAnd
<infix_rank6_op> ::= LOr
<call_or_access> ::= <tightexpr> { <tightexpr> }
<binding_pattern> ::= Symbol <bp_innards>
| Literal
<bp_innards> ::= { <bp_field> }
| { <binding_pattern }
<bp_innards> ::= <bp_field> { <bp_field> }
| { <binding_pattern> }
<bp_field> ::= Dot Symbol Assign <binding_pattern>

16
sample-initial.amo Normal file
View file

@ -0,0 +1,16 @@
type MyType
= Variant1
| Variant2 int[usize]
| Variant3 str int[0..21]
myFunc: str, int -> MyType
myFunc strval intval =
if intval < 21 && intval > 0
then Variant3 strval intval
else Variant1
type ComplexType
= ComplexVariant (str, int -> MyType) str
myVal : ComplexType
myVal = ComplexVariant myFunc "uwu~"

View file

@ -24,9 +24,9 @@ trait Functor[a, b] on Self _ needs
.map : (a -> b), Self a -> Self b
.pure : a -> Self a
type ComplexType[a, b : Functor]
type ComplexType a b[Functor]
= Left a
| Right b
Right b
impl Functor on Option _:
.map mapper union = if union is

View file

@ -1,17 +1,50 @@
use std::{fs::File, io::Read};
use std::{fs::File, io::Read, collections::LinkedList, process::exit};
use logos::Logos;
use parser::{program::Program, Parsable, WrappedLexer};
mod token;
mod parser;
mod ir;
fn main() -> std::io::Result<()> {
let mut input_file = File::open("sample.amo")?;
let mut input_file = File::open("sample-initial.amo")?;
let mut input = String::with_capacity(4096);
input_file.read_to_string(&mut input)?;
for tok in token::Token::lexer(&input) {
println!("{tok:?}");
}
let lexer = token::Token::lexer(&input);
let mut wrapped_lexer = WrappedLexer::new(lexer);
let program = match Program::parse(&mut wrapped_lexer) {
Err(e) => {
let location = e.location;
eprintln!("Parse error at {location:?}!");
eprintln!("Expected one of:");
for token in e.expected {
eprintln!(" - {token:?}");
}
exit(1);
}
Ok(p) => p
};
println!("Parse successful!!");
for decl in program.0 {
println!("{decl:?}\n");
}
Ok(())
}
pub fn cons<T>(mut v: Vec<T>, e: T) -> Vec<T> {
v.push(e);
v
}
pub fn cons_ll<T>(mut v: LinkedList<T>, e: T) -> LinkedList<T> {
v.push_back(e);
v
}
pub fn join<T>(mut a: LinkedList<T>, mut b: LinkedList<T>) -> LinkedList<T> {
a.append(&mut b);
a
}

112
src/parser/declaration.rs Normal file
View file

@ -0,0 +1,112 @@
use std::mem::{Discriminant, discriminant};
use crate::token::Token;
use super::{Parsable, WrappedLexer, ParseError, absorb_token_or_error, types::{TightType, FullType}, expr::Expr};
#[derive(Debug)]
pub enum Declaration {
Type {
name: String,
first_variants: Variant,
other_variants: Vec<Variant>,
},
/*Struct {
name: String,
typeargs: Vec<TypeArg>,
first_fields: Field,
other_fields: Vec<Field>,
},
Trait {
name: String,
typeargs: Vec<TypeArg>,
on: Option<CompositeType>,
},
Impl(ImplDecl),*/
Symbol {
name: String,
type_: FullType,
name2: String,
args: Vec<String>,
value: Expr,
},
Empty,
}
impl Parsable for Declaration {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![
discriminant(&Token::Symbol("".to_string())),
discriminant(&Token::Type),
], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
// Type Symbol Assign <variant_decl> { VBar <variant_decl> }
Token::Type => {
let name = String::parse(l)?;
absorb_token_or_error(l, discriminant(&Token::Assign))?;
let first_variants = Variant::parse(l)?;
let mut other_variants = Vec::new();
while *l.curtok() == Token::VBar {
l.monch();
other_variants.push(Variant::parse(l)?);
}
Ok(Self::Type {
first_variants,
other_variants,
name,
})
},
// Symbol Colon <full_type> DeclarationStart Symbol { Symbol } Assign <expr>
Token::Symbol(name) => {
absorb_token_or_error(l, discriminant(&Token::Colon))?;
let type_ = FullType::parse(l)?;
absorb_token_or_error(l, discriminant(&Token::DeclarationStart))?;
let name2 = String::parse(l)?;
let args = Vec::parse(l)?;
absorb_token_or_error(l, discriminant(&Token::Assign))?;
let value = Expr::parse(l)?;
Ok(Self::Symbol {
name,
name2,
args,
type_,
value,
})
},
Token::EOF | Token::DeclarationStart => {
Ok(Self::Empty)
},
_ => {
Err(ParseError {
expected: Self::expected().0,
location: span,
})
}
}
}
}
#[derive(Debug)]
pub struct Variant {
name: String,
elements: Vec<TightType>,
}
impl Parsable for Variant {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(
vec![discriminant(&Token::Symbol(String::new()))],
false,
)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
// <variant_declaration> ::= Symbol { <grouped_type> }
Ok(Variant {
name: String::parse(l)?,
elements: Vec::parse(l)?,
})
}
}

179
src/parser/expr.rs Normal file
View file

@ -0,0 +1,179 @@
// <expr> ::= <tiGHtexpr> { <tightexpr> }
// | If <expr> <ifblock>
// | <let> { <let> } In <expr>
// | <infix_expr>
//
// <tightexpr> ::= Literal
// | OpenParen <expr> CloseParen
//
// <let> ::= Let Symbol Assign <expr>
//
// <ifblock> ::= Is <case> { Comma <case> } [ Comma ]
// | Then <expr> Else <expr>
//
// <case> ::= <binding_pattern> Aro <expr>
use std::mem::{Discriminant, discriminant};
use crate::token::{Literal, Token};
use super::{Parsable, pattern::BindingPattern, infix::Rank1Exp, ParseError, WrappedLexer, absorb_token_or_error, parse_delineated_vec};
#[derive(Debug)]
pub enum Expr {
If(Box<Expr>, Box<IfBlock>),
Let(Box<LetStmt>, Vec<LetStmt>, Box<Expr>),
Infix(Rank1Exp),
}
impl Parsable for Expr {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
let mut expected = Rank1Exp::expected().0;
expected.extend_from_slice(&[
discriminant(&Token::Let),
discriminant(&Token::If),
]);
(expected, false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let next = discriminant(l.curtok());
let infix_expected = Rank1Exp::expected().0;
if infix_expected.iter().find(|t| **t == next).is_some() {
Rank1Exp::parse(l).map(Expr::Infix)
} else {
match l.curtok() {
Token::Let => {
let initial_let = LetStmt::parse(l)?;
let subsequent_lets = Vec::parse(l)?;
absorb_token_or_error(l, discriminant(&Token::In))?;
let in_expr = Expr::parse(l)?;
Ok(Expr::Let(Box::new(initial_let), subsequent_lets, Box::new(in_expr)))
}
Token::If => {
l.monch();
let cond = Expr::parse(l)?;
let block = IfBlock::parse(l)?;
Ok(Expr::If(Box::new(cond), Box::new(block)))
},
_ => {
Err(ParseError {
location: l.span(),
expected: Self::expected().0,
})
}
}
}
}
}
#[derive(Debug)]
pub enum TightExpr {
Literal(Literal),
Grouped(Expr),
Symbol(String),
}
impl Parsable for TightExpr {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(
vec![
discriminant(&Token::Literal(Literal::Int(0))),
discriminant(&Token::OpenParen),
discriminant(&Token::Symbol(String::new())),
],
false
)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::Literal(literal) => Ok(TightExpr::Literal(literal)),
Token::OpenParen => {
let expr = Expr::parse(l)?;
absorb_token_or_error(l, discriminant(&Token::CloseParen))?;
Ok(TightExpr::Grouped(expr))
}
Token::Symbol(name) => Ok(TightExpr::Symbol(name)),
_ => Err(ParseError {
location: span,
expected: Self::expected().0,
})
}
}
}
#[derive(Debug)]
pub enum IfBlock {
IfIs(Case, Vec<Case>),
IfThen(Expr, Expr),
}
impl Parsable for IfBlock {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(
vec![
discriminant(&Token::Is),
discriminant(&Token::Then),
],
false
)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::Is => {
let initial_case = Case::parse(l)?;
let other_cases = if let Token::Comma = l.curtok() {
parse_delineated_vec(l, discriminant(&Token::Comma))?
} else {
Vec::new()
};
Ok(IfBlock::IfIs(initial_case, other_cases))
}
Token::Then => {
let positive = Expr::parse(l)?;
absorb_token_or_error(l, discriminant(&Token::Else))?;
let negative = Expr::parse(l)?;
Ok(IfBlock::IfThen(positive, negative))
}
_ => Err(ParseError {
location: span,
expected: Self::expected().0,
})
}
}
}
#[derive(Debug)]
pub struct LetStmt(String, Expr);
impl Parsable for LetStmt {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::Let)], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
absorb_token_or_error(l, discriminant(&Token::Let))?;
let symbol = String::parse(l)?;
absorb_token_or_error(l, discriminant(&Token::Assign))?;
let value = Expr::parse(l)?;
Ok(LetStmt(symbol, value))
}
}
#[derive(Debug)]
pub struct Case(BindingPattern, Expr);
impl Parsable for Case {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
BindingPattern::expected()
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let pattern = BindingPattern::parse(l)?;
absorb_token_or_error(l, discriminant(&Token::Aro))?;
let expr = Expr::parse(l)?;
Ok(Case(pattern, expr))
}
}

11
src/parser/infix.rs Normal file
View file

@ -0,0 +1,11 @@
use crate::token::{InfixRank1, InfixRank2, InfixRank3, InfixRank5, InfixRank6, InfixRank7};
use super::expr::TightExpr;
pub type Rank1Exp = (Rank2Exp, Vec<(InfixRank1, Rank2Exp)>);
pub type Rank2Exp = (Rank3Exp, Vec<(InfixRank2, Rank3Exp)>);
pub type Rank3Exp = (Rank4Exp, Vec<(InfixRank3, Rank4Exp)>);
pub type Rank4Exp = (Rank5Exp, Vec< Rank5Exp>);
pub type Rank5Exp = (Rank6Exp, Vec<(InfixRank5, Rank6Exp)>);
pub type Rank6Exp = (Rank7Exp, Vec<(InfixRank6, Rank7Exp)>);
pub type Rank7Exp = (Box<TightExpr>, Vec<(InfixRank7, TightExpr)>);

271
src/parser/mod.rs Normal file
View file

@ -0,0 +1,271 @@
//! Tools to convert a stream of tokens into a parse tree
//!
//! Parsing is the stage that follows lexing, wherein a linear stream of tokens is
//! converted into a tree structure that reflects the syntax of the language. This is
//! also where syntax errors are caught.
//!
//! amo uses a push-based parsing system, where state is represented using a
//! [`PartialTree`], which can [`accept()`](PartialTree::accept) new tokens, producing a
//! new state.
//!
//! The root of the parser is typically [`crate::parser::program`].
use std::{ops::Range, mem::{Discriminant, self}};
use logos::Lexer;
use crate::{token::{Token, InfixRank1, InfixRank7, InfixRank6, InfixRank5, InfixRank3, InfixRank2}, cons};
use std::mem::discriminant;
pub mod program;
pub mod declaration;
pub mod types;
pub mod expr;
pub mod pattern;
pub mod infix;
pub trait Parsable: Sized {
fn expected() -> (Vec<Discriminant<Token>>, bool);
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError>;
fn matches(l: &Token) -> bool {
let (expected, can_zero_width) = Self::expected();
can_zero_width || expected.contains(&discriminant(l))
}
}
/// Information about a parsing error that happened somewhere
#[derive(Debug)]
pub struct ParseError {
/// The span of the token that the parse error occured on
pub location: Range<usize>,
/// The specific error that occured, with more details
pub expected: Vec<Discriminant<Token>>,
}
pub struct WrappedLexer<'a>(Lexer<'a, Token>, Token);
impl<'a> WrappedLexer<'a> {
pub fn new(mut l: Lexer<'a, Token>) -> WrappedLexer {
let token = l.next().unwrap_or(Token::EOF);
WrappedLexer(l, token)
}
pub fn curtok(&self) -> &Token {
&self.1
}
pub fn monch(&mut self) -> Token {
let mut old = self.0.next().unwrap_or(Token::EOF);
mem::swap(&mut old, &mut self.1);
old
}
pub fn span(&self) -> Range<usize> {
self.0.span()
}
}
impl<P: Parsable> Parsable for Box<P> {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
P::expected()
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
P::parse(l).map(Box::new)
}
}
impl<P: Parsable> Parsable for Option<P> {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(P::expected().0, true)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
if P::matches(l.curtok()) {
P::parse(l).map(Some)
} else {
Ok(None)
}
}
}
impl<A: Parsable, B: Parsable> Parsable for (A, B) {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
let (a_expct, a_zw) = A::expected();
if a_zw {
let (b_expct, b_zw) = B::expected();
(
a_expct.into_iter()
.chain(b_expct.into_iter())
.collect(),
b_zw
)
} else {
(a_expct, false)
}
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let a = A::parse(l)?;
let b = B::parse(l)?;
Ok((a, b))
}
}
impl Parsable for String {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::Symbol("".to_owned()))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::Symbol(s) => Ok(s),
_ => Err(ParseError {
location: span,
expected: Self::expected().0,
})
}
}
}
impl<P: Parsable> Parsable for Vec<P> {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(P::expected().0, true)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
vec_parse_inner(l, Vec::new())
}
}
fn vec_parse_inner<P: Parsable>(l: &mut WrappedLexer, collected: Vec<P>) -> Result<Vec<P>,ParseError> {
if P::matches(l.curtok()) {
let new_element = P::parse(l)?;
vec_parse_inner(l, cons(collected, new_element))
} else {
Ok(collected)
}
}
pub fn parse_delineated_vec<P: Parsable>(l: &mut WrappedLexer, delineator: Discriminant<Token>) -> Result<Vec<P>, ParseError> {
parse_delineated_vec_inner(l, delineator, Vec::new())
}
fn parse_delineated_vec_inner<P: Parsable>(l: &mut WrappedLexer, d: Discriminant<Token>, acc: Vec<P>) -> Result<Vec<P>, ParseError> {
if P::matches(l.curtok()) {
let acc = cons(acc, P::parse(l)?);
if d == discriminant(l.curtok()) {
l.monch();
parse_delineated_vec_inner(l, d, acc)
} else { Ok(acc) }
} else { Ok(acc) }
}
pub fn absorb_token_or_error(l: &mut WrappedLexer, t: Discriminant<Token>) -> Result<Token, ParseError> {
if discriminant(l.curtok()) == t {
Ok(l.monch())
} else {
Err(ParseError {
location: l.span(),
expected: vec![t],
})
}
}
impl Parsable for InfixRank1 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R1Infix(InfixRank1::LOr))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R1Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank2 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R2Infix(InfixRank2::LAnd))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R2Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank3 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R3Infix(InfixRank3::Eq))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R3Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank5 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R5Infix(InfixRank5::Range))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R5Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank6 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R6Infix(InfixRank6::Add))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R6Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}
impl Parsable for InfixRank7 {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(vec![discriminant(&Token::R7Infix(InfixRank7::Mul))], false)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::R7Infix(o) => Ok(o),
_ => Err(ParseError {
location: span,
expected: Self::expected().0
}),
}
}
}

66
src/parser/pattern.rs Normal file
View file

@ -0,0 +1,66 @@
// <binding_pattern> ::= Symbol { <tight_binding_pattern> }
// <tight_binding_pattern> ::= Literal
// | Symbol
// | OpenParen <binding_pattern> CloseParen
use std::mem::{discriminant, Discriminant};
use crate::token::{Literal, Token};
use super::{Parsable, ParseError, WrappedLexer, absorb_token_or_error};
#[derive(Debug)]
pub enum TightBindingPattern {
Literal(Literal),
Symbol(String),
Group(BindingPattern),
}
impl Parsable for TightBindingPattern {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(
vec![
discriminant(&Token::Literal(Literal::Int(0))),
discriminant(&Token::Symbol(String::new())),
discriminant(&Token::OpenParen),
],
false,
)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let location = l.span();
match l.curtok() {
Token::Literal(l) => Ok(TightBindingPattern::Literal(l.clone())),
Token::Symbol(name) => Ok(TightBindingPattern::Symbol(name.to_owned())),
Token::OpenParen => {
let patt = BindingPattern::parse(l)?;
absorb_token_or_error(l, discriminant(&Token::CloseParen))?;
Ok(TightBindingPattern::Group(patt))
},
_ => {
Err(ParseError {
expected: Self::expected().0,
location,
})
}
}
}
}
#[derive(Debug)]
pub struct BindingPattern(String, Vec<TightBindingPattern>);
impl Parsable for BindingPattern {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(
vec![
discriminant(&Token::Symbol(String::new())),
], false
)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let name = String::parse(l)?;
let fields = Vec::parse(l)?;
Ok(Self(name, fields))
}
}

34
src/parser/program.rs Normal file
View file

@ -0,0 +1,34 @@
use std::mem::{discriminant, Discriminant};
use crate::{token::Token, cons};
use super::{ParseError, Parsable, WrappedLexer, declaration::Declaration};
#[derive(Debug)]
pub struct Program(pub Vec<Declaration>);
impl Parsable for Program {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(
cons(Declaration::expected().0, discriminant(&Token::DeclarationStart)),
true
)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
parse_program_inner(l, Vec::with_capacity(10)).map(Program)
}
}
fn parse_program_inner(l: &mut WrappedLexer, acc: Vec<Declaration>) -> Result<Vec<Declaration>, ParseError> {
if *l.curtok() == Token::DeclarationStart {
l.monch();
parse_program_inner(l, acc)
} else if Declaration::matches(l.curtok()) {
let acc = cons(acc, Declaration::parse(l)?);
parse_program_inner(l, acc)
} else {
eprintln!("Looking for a declaration but got {:?} at {:?}", *l.curtok(), l.span());
Ok(acc)
}
}

View file

@ -0,0 +1,72 @@
use std::{collections::LinkedList, ops::Range};
use crate::token::Token;
use super::{Tree, ParseError, PartialTree, ParseErrorDetails};
#[derive(Default, Debug)]
pub struct Program(Vec<Declaration>, LinkedList<ParseError>);
#[derive(Default, Debug)]
pub struct PartialProgram(Program, Option<PartialDeclaration>);
impl PartialTree for PartialProgram {
type Final = Program;
fn accept(self, t: Token, s: Range<usize>) -> Result<(Self::Final, Token), Self> {
match self.1 {
Some(partial) =>
// A partial parse is already in progress
match partial.accept(t, s) {
Ok((decl, tok)) => {
// The parse finished, keep propegating the token
self.0.0.push_front(decl);
PartialProgram(self.0, None).accept(tok, s)
}
Err(partial) =>
// The parse continues
Err(PartialProgram(self.0, partial)),
}
None =>
// We just finished parsing a declaration, and are now looking for a
// DeclarationStart
match t {
Token::DeclarationStart =>
// We got what we were looking for
Err(PartialProgram(self.0, Some(Default::default()))),
Token::Type |
Token::Struct |
Token::Trait |
Token::Impl |
Token::Symbol(_) => {
// Seems like the user just forgot to start a declaration
self.0.1.push_front(ParseError(
s, ParseErrorDetails::MissingDeclarationStart
));
PartialProgram(self.0, Some(Default::default())).accept(t, s)
}
_ => {
// No idea what's happening here, just throw it away
self.0.1.push_front(ParseError(
s, ParseErrorDetails::FoundGarbage(0)
));
Err(self)
}
}
}
}
}
impl Tree for Program {
fn get_errors(&self) -> LinkedList<&ParseError> {
self.0.iter()
.map(|d| d.get_errors())
.collect()
}
}
impl Tree for PartialProgram {
fn get_errors(&self) -> LinkedList<&ParseError> {
self.0.get_errors()
}
}

85
src/parser/types.rs Normal file
View file

@ -0,0 +1,85 @@
// <full_type> ::= <tight_type> [ Comma [ <additional_type> ] [ Aro <full_type> ] ]
// <additional_type> ::= <tight_type> [ Comma [ <additional_type> ] ]
// <tight_type> ::= OpenParen <full_type> CloseParen
// | Symbol [ OpenSquareBracket <domain> CloseSquareBracket ]
// <domain> ::= <expr> [ Comma [ <domain> ] ]
use std::mem::{Discriminant, discriminant};
use crate::token::Token;
use super::{Parsable, WrappedLexer, parse_delineated_vec, absorb_token_or_error, ParseError, expr::Expr};
#[derive(Debug)]
pub enum TightType {
Grouped(Box<FullType>),
Simple {
name: String,
domains: Vec<Expr>,
},
}
impl Parsable for TightType {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
(
vec![
discriminant(&Token::OpenParen),
discriminant(&Token::Symbol(String::new())),
],
false,
)
}
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let span = l.span();
match l.monch() {
Token::Symbol(name) => {
let domains: Vec<Expr> = if *l.curtok() == Token::OpenSquareBracket {
l.monch();
let d = parse_delineated_vec(l, discriminant(&Token::Comma))?;
absorb_token_or_error(l, discriminant(&Token::CloseSquareBracket))?;
d
} else {
Vec::new()
};
Ok(TightType::Simple { name, domains })
}
Token::OpenParen => {
let full_type = Box::new(FullType::parse(l)?);
absorb_token_or_error(l, discriminant(&Token::CloseParen))?;
Ok(TightType::Grouped(full_type))
}
_ => Err(ParseError {
location: span,
expected: Self::expected().0,
})
}
}
}
#[derive(Debug)]
pub enum FullType {
Simple(TightType),
Function(Vec<TightType>, Box<FullType>),
}
impl Parsable for FullType {
fn expected() -> (Vec<Discriminant<Token>>, bool) {
TightType::expected()
}
// <full_type> ::= <tight_type> [ Comma [ <additional_type> ] [ Aro <full_type> ] ]
fn parse(l: &mut WrappedLexer) -> Result<Self, ParseError> {
let first_type = TightType::parse(l)?;
if let Token::Comma = l.curtok() {
l.monch();
let other_args = parse_delineated_vec(l, discriminant(&Token::Comma))?;
absorb_token_or_error(l, discriminant(&Token::Aro))?;
let result = Box::new(FullType::parse(l)?);
let mut args = Vec::with_capacity(1 + other_args.len());
args.push(first_type);
args.extend(other_args);
Ok(FullType::Function(args, result))
} else {
Ok(FullType::Simple(first_type))
}
}
}

View file

@ -22,153 +22,176 @@ pub enum Token {
#[regex(r"[ \t]+", logos::skip)]
#[regex(r"\n[ \t]+", logos::skip)]
#[regex(r"//.+", logos::skip)]
#[regex(r"/\*[.\n]+\*/", logos::skip)]
Error,
#[regex(r"/\*([^*]*(\*[^/])?)+\*/", logos::skip)]
Error, //d00
/// The `type` keyword
///
/// Used to denote that a new type (called an enum in some languages) is being
/// declared
#[token("type")]
Type,
Type, //d01
/// The `struct` keyword
///
/// Used to denote the declaration of a kind of struct
#[token("struct")]
Struct,
Struct, //d02
/// The `trait` keywor, Eqd
///
/// Denotes the declaration of a new trait
#[token("trait")]
Trait,
Trait, //d03
/// The `needs` keyword
///
/// Used as part of a trait declaration to denote methods that will be required for a
/// trait
#[token("needs")]
Needs,
Needs, //d04
/// The `if` keyword
///
/// Used to begin an If-Then-Else statement or an If-Is statement
#[token("if")]
If,
If, //d05
/// The `is` keyword
///
/// Used as part of an If-Is statement to indictate the start of the case listings
#[token("is")]
Is,
Is, //d06
/// The `then` keyword
///
/// Indicates the start of the code block for the positive section of an If-Then-Else
/// statement
#[token("then")]
Then,
Then, //d07
/// The `else` keyword
///
/// Denotes the end of the positive section of an If-Then-Else block, and the begining
/// of the negative section
#[token("else")]
Else,
Else, //d08
/// the `impl` keyword
///
/// Used to denote the start of a trait implementation
#[token("impl")]
Impl,
Impl, //d09
/// the `on` keyword
///
/// Used in trait implementationsto seperate the trait being implemented and the type
/// it's being implemented on.
#[token("on")]
On,
On, //d10
/// the `let` keyword
///
/// Allows binding a value to an immutable variable that can be used multiple times
#[token("let")]
Let,
Let, //d11
/// the `in` keyword
///
/// Used to seperate a series of `let` bindings from the expression they're being used
/// in.
#[token("in")]
In,
In, //d12
/// An `->` arrow
///
/// Used as part of function type annotations as well as in the cases of If-Is blocks
#[token("->")]
Aro,
Aro, //d13
/// An `=` assignment operator
///
/// Used to seperate the left & right hand signs of an assignment operation
#[token("=")]
Assign,
Assign, //d14
/// The `|` keyword (or punctuation? idk what it's called)
///
/// Used in deliniating variants of a type
#[token("|")]
VBar,
VBar, //d15
/// The `_` symbol
///
/// Generally used as a placeholder or standin for another type
#[token("_")]
Placeholder,
Placeholder, //d16
/// The `:` symbol
///
/// Used as a seperator in various parts of the language
#[token(":")]
Colon,
Colon, //d17
/// Any infix binop (binary operator)
/// A rank 1 (applied last) infix binop (binary operator)
///
/// E.g. +, -, >, /, %, etc.
/// i.e. Logical Or
#[token("||", |_| InfixRank1::LOr)]
R1Infix(InfixRank1), //d18
/// A rank 2 infix binop (binary operator)
///
/// These are operators that take two operands, one on the left, and one on the right,
/// and produce a single value. I don't think there are any two character
#[token("&&", |_| InfixOp::LAnd)]
#[token("||", |_| InfixOp::LOr)]
#[token("==", |_| InfixOp::Eq)]
#[token("!=", |_| InfixOp::NEq)]
#[token("*", |_| InfixOp::Mult)]
#[token("%", |_| InfixOp::Mod)]
#[token("/", |_| InfixOp::Div)]
#[token("+", |_| InfixOp::Add)]
#[token("-", |_| InfixOp::Sub)]
#[token("<", |_| InfixOp::Less)]
#[token(">", |_| InfixOp::Greater)]
Infix(InfixOp),
/// i.e. Logical And
#[token("&&", |_| InfixRank2::LAnd)]
R2Infix(InfixRank2), //d19
/// A rank 3 infix binop (binary operator)
///
/// i.e. Comparison operators like == and <
#[token("==", |_| InfixRank3::Eq)]
#[token("!=", |_| InfixRank3::NEq)]
#[token("<", |_| InfixRank3::LessThan)]
#[token(">", |_| InfixRank3::GreaterThan)]
R3Infix(InfixRank3), //d20
/// A rank 5 infix binop (binary operator)
///
/// i.e. Range
#[token("..", |_| InfixRank5::Range)]
R5Infix(InfixRank5), //d21
/// A rank 6 infix binop (binary operator)
///
/// i.e. Addition & Subtraction
#[token("+", |_| InfixRank6::Add)]
#[token("-", |_| InfixRank6::Sub)]
R6Infix(InfixRank6), //d22
/// A rank 7 (applied first) infix binop (binary operator)
///
/// i.e. Multiplication, Division, and Modulo
#[token("*", |_| InfixRank7::Mul)]
#[token("/", |_| InfixRank7::Div)]
#[token("%", |_| InfixRank7::Mod)]
R7Infix(InfixRank7), //d23
/// Some literal (a constant value represented textually)
///
/// For example, 100 is an integer literal, "hewwo" is a string literal, and `true` is
/// a boolean literal.
#[regex("\"(?:.+(?:\\\\\")?)+\"", |lex| Literal::from_string_match(lex.slice()))]
#[regex("\"(?:[^\"]*(?:\\\\\")?)+\"", |lex| Literal::from_string_match(lex.slice()))]
#[regex(r"\d+", |lex| Literal::from_int_match(lex.slice()))]
Literal(Literal),
Literal(Literal), //d24
/// Some symbol, usually a variable or a type
#[regex(r"[a-zA-Z][a-zA-Z\d]*", |lex| lex.slice().to_string(), priority = 0)]
Symbol(String),
Symbol(String), //d25
/// An opening `[` square bracket
///
/// Usually used in arrays and domain restrictions
#[token("[")]
OpenSquareBracket,
OpenSquareBracket, //d26
/// A closing `]` square bracket
///
@ -178,38 +201,32 @@ pub enum Token {
/// In amo, the opening and closing square brackets are both lesbians, and they're
/// dating. The closing square bracket is transgender, also.
#[token("]")]
CloseSquareBracket,
CloseSquareBracket, //d27
/// An opening `(` paren
///
/// Usually used to make explicit the order of operations
#[token("(")]
OpenParen,
OpenParen, //d28
/// A closing `)` paren
///
/// Usually used in arrays and domain restrictions, this is the counterpart to the
/// open parenthesis.
#[token(")")]
CloseParen,
/// A `..` range operator
///
/// Used to denote, well, a range between the values on the left and the right.
#[token("..")]
RangeOp,
CloseParen, //d29
/// A `.` period
///
/// For getting fields of structs
#[token(".")]
Dot,
Dot, //d30
/// A `,` comma
///
/// The age-old and timeless delineator
#[token(",")]
Comma,
Comma, //d31
/// A newline NOT followed by whitespace
///
@ -229,50 +246,34 @@ pub enum Token {
/// simply lexes to `Symbol(variable)`, `Assign`, `Symbol(value)`. This makes it easy
/// to identify declarations.
#[regex(r"\s*\n")]
DeclarationStart,
DeclarationStart, //d32
/// Denotes that the parser has reached the end of the input
///
/// This is always the last token in a stream, both in that it must be present in all
/// streams, and in that it will never be followed by any tokens.
EOF, //d33
}
#[derive(Debug, PartialEq, Eq)]
/// A specific infix operator
///
/// Used to specify the [`Token::Infix`] variant.
pub enum InfixOp {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InfixRank1 {
/// The logical OR operator
///
/// Takes two boolean values and returns true if either is true
LOr,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InfixRank2 {
/// The logical AND operator
///
/// Takes two boolean values and returns true iff both values are true. Otherwise,
/// returns false.
LAnd,
}
/// The logical OR operator
///
/// Takes two boolean values and returns true if either is true
LOr,
/// The multiplicitive operator.
///
/// Takes two numeric values and returns their product
Mult,
/// The modulo operator.
///
/// Takes two numeric values and returns the remainder of their division
Mod,
/// The division operator.
///
/// Takes two numeric values and returns their quotient
Div,
/// The additive operator.
///
/// Takes two numeric values and returns their sum
Add,
/// The subtractive operator.
///
/// Takes two numeric values and returns their difference
Sub,
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InfixRank3 {
/// The equality operator.
///
/// Takes two values and returns true iff they are equal
@ -286,15 +287,54 @@ pub enum InfixOp {
/// The less-than operator.
///
/// Takes two numeric values and returns true iff the first is LESS than the second
Less,
LessThan,
/// The greater-than operator.
///
/// Takes two numeric values and returns true iff the first is GREATER than the second
Greater,
GreaterThan,
}
#[derive(Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InfixRank5 {
/// The additive operator.
///
/// Takes two numeric values and returns a range from the first to the second
Range,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InfixRank6 {
/// The additive operator.
///
/// Takes two numeric values and returns their sum
Add,
/// The subtractive operator.
///
/// Takes two numeric values and returns their difference
Sub,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InfixRank7 {
/// The multiplicitive operator.
///
/// Takes two numeric values and returns their product
Mul,
/// The modulo operator.
///
/// Takes two numeric values and returns the remainder of their division
Mod,
/// The division operator.
///
/// Takes two numeric values and returns their quotient
Div,
}
#[derive(Clone, Debug, PartialEq, Eq)]
/// A specific type of literal, used for the [`Token::Literal`] token
pub enum Literal {
/// A string literal