From 250d40dab79cd714db1f044ae48e4479570a5c97 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Fri, 22 Apr 2022 11:49:28 -0400 Subject: [PATCH] Update the lexer to match changes to the grammar --- grammar-initial.ebnf | 4 +- sample-initial.amo | 14 ++++--- src/parser/declaration.rs | 80 +++++------------------------------- src/parser/expr.rs | 13 ++++-- src/parser/mod.rs | 1 - src/parser/types.rs | 85 --------------------------------------- src/token.rs | 29 ++++++++----- 7 files changed, 48 insertions(+), 178 deletions(-) delete mode 100644 src/parser/types.rs diff --git a/grammar-initial.ebnf b/grammar-initial.ebnf index 103b0f2..d562008 100644 --- a/grammar-initial.ebnf +++ b/grammar-initial.ebnf @@ -25,8 +25,8 @@ ::= { } ::= { } ::= { } - ::= LOr - ::= LAnd + ::= LOr | VBar + ::= LAnd | Aro ::= Eq | NEq | LessThan | GreaterThan ::= Range ::= Add | Sub diff --git a/sample-initial.amo b/sample-initial.amo index e266248..95d3d7b 100644 --- a/sample-initial.amo +++ b/sample-initial.amo @@ -1,7 +1,8 @@ -type MyType - = Variant1 - | Variant2 int[usize] - | Variant3 str int[0..21] +MyType: Type +MyType = type + Variant1 + | Variant2 (int 0..usize) + | Variant3 str (int 0..21) myFunc: str, int -> MyType myFunc strval intval = @@ -9,8 +10,9 @@ myFunc strval intval = then Variant3 strval intval else Variant1 -type ComplexType - = ComplexVariant (str, int -> MyType) str +ComplexType: Type +ComplexType = type + ComplexVariant (str, int -> MyType) str myVal : ComplexType myVal = ComplexVariant myFunc "uwu~" diff --git a/src/parser/declaration.rs b/src/parser/declaration.rs index 421ca1f..185dbee 100644 --- a/src/parser/declaration.rs +++ b/src/parser/declaration.rs @@ -2,73 +2,36 @@ use std::mem::{Discriminant, discriminant}; use crate::token::Token; -use super::{Parsable, WrappedLexer, ParseError, absorb_token_or_error, types::{TightType, FullType}, expr::Expr}; +use super::{Parsable, WrappedLexer, ParseError, absorb_token_or_error, expr::Expr}; #[derive(Debug)] -pub enum Declaration { - Type { - name: String, - first_variants: Variant, - other_variants: Vec, - }, - /*Struct { - name: String, - typeargs: Vec, - first_fields: Field, - other_fields: Vec, - }, - Trait { - name: String, - typeargs: Vec, - on: Option, - }, - Impl(ImplDecl),*/ - Symbol { - name: String, - type_: FullType, - name2: String, - args: Vec, - value: Expr, - }, - Empty, +pub struct Declaration { + name: String, + type_: Expr, + name2: String, + args: Vec, + value: Expr, } impl Parsable for Declaration { fn expected() -> (Vec>, bool) { (vec![ discriminant(&Token::Symbol("".to_string())), - discriminant(&Token::Type), ], false) } fn parse(l: &mut WrappedLexer) -> Result { let span = l.span(); match l.monch() { - // Type Symbol Assign { VBar } - Token::Type => { - let name = String::parse(l)?; - absorb_token_or_error(l, discriminant(&Token::Assign))?; - let first_variants = Variant::parse(l)?; - let mut other_variants = Vec::new(); - while *l.curtok() == Token::VBar { - l.monch(); - other_variants.push(Variant::parse(l)?); - } - Ok(Self::Type { - first_variants, - other_variants, - name, - }) - }, // Symbol Colon DeclarationStart Symbol { Symbol } Assign Token::Symbol(name) => { absorb_token_or_error(l, discriminant(&Token::Colon))?; - let type_ = FullType::parse(l)?; + let type_ = Expr::parse(l)?; absorb_token_or_error(l, discriminant(&Token::DeclarationStart))?; let name2 = String::parse(l)?; let args = Vec::parse(l)?; absorb_token_or_error(l, discriminant(&Token::Assign))?; let value = Expr::parse(l)?; - Ok(Self::Symbol { + Ok(Self { name, name2, args, @@ -76,9 +39,6 @@ impl Parsable for Declaration { value, }) }, - Token::EOF | Token::DeclarationStart => { - Ok(Self::Empty) - }, _ => { Err(ParseError { expected: Self::expected().0, @@ -88,25 +48,3 @@ impl Parsable for Declaration { } } } - -#[derive(Debug)] -pub struct Variant { - name: String, - elements: Vec, -} - -impl Parsable for Variant { - fn expected() -> (Vec>, bool) { - ( - vec![discriminant(&Token::Symbol(String::new()))], - false, - ) - } - fn parse(l: &mut WrappedLexer) -> Result { - // ::= Symbol { } - Ok(Variant { - name: String::parse(l)?, - elements: Vec::parse(l)?, - }) - } -} diff --git a/src/parser/expr.rs b/src/parser/expr.rs index bf5c49a..e0fc275 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -1,7 +1,7 @@ -// ::= { } -// | If +// ::= If // | { } In // | +// | TypeOp // // ::= Literal // | OpenParen CloseParen @@ -24,6 +24,7 @@ pub enum Expr { If(Box, Box), Let(Box, Vec, Box), Infix(Rank1Exp), + Type(Box), } impl Parsable for Expr { @@ -32,6 +33,7 @@ impl Parsable for Expr { expected.extend_from_slice(&[ discriminant(&Token::Let), discriminant(&Token::If), + discriminant(&Token::TypeOp), ]); (expected, false) } @@ -49,6 +51,11 @@ impl Parsable for Expr { let in_expr = Expr::parse(l)?; Ok(Expr::Let(Box::new(initial_let), subsequent_lets, Box::new(in_expr))) } + Token::TypeOp => { + l.monch(); + let value = Expr::parse(l)?; + Ok(Expr::Type(Box::new(value))) + } Token::If => { l.monch(); let cond = Expr::parse(l)?; @@ -172,7 +179,7 @@ impl Parsable for Case { fn parse(l: &mut WrappedLexer) -> Result { let pattern = BindingPattern::parse(l)?; - absorb_token_or_error(l, discriminant(&Token::Aro))?; + absorb_token_or_error(l, discriminant(&Token::DubAro))?; let expr = Expr::parse(l)?; Ok(Case(pattern, expr)) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ca51457..2de893e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -19,7 +19,6 @@ use std::mem::discriminant; pub mod program; pub mod declaration; -pub mod types; pub mod expr; pub mod pattern; pub mod infix; diff --git a/src/parser/types.rs b/src/parser/types.rs deleted file mode 100644 index dc35c3a..0000000 --- a/src/parser/types.rs +++ /dev/null @@ -1,85 +0,0 @@ -// ::= [ Comma [ ] [ Aro ] ] -// ::= [ Comma [ ] ] -// ::= OpenParen CloseParen -// | Symbol [ OpenSquareBracket CloseSquareBracket ] -// ::= [ Comma [ ] ] - -use std::mem::{Discriminant, discriminant}; - -use crate::token::Token; - -use super::{Parsable, WrappedLexer, parse_delineated_vec, absorb_token_or_error, ParseError, expr::Expr}; - -#[derive(Debug)] -pub enum TightType { - Grouped(Box), - Simple { - name: String, - domains: Vec, - }, -} - -impl Parsable for TightType { - fn expected() -> (Vec>, bool) { - ( - vec![ - discriminant(&Token::OpenParen), - discriminant(&Token::Symbol(String::new())), - ], - false, - ) - } - fn parse(l: &mut WrappedLexer) -> Result { - let span = l.span(); - match l.monch() { - Token::Symbol(name) => { - let domains: Vec = if *l.curtok() == Token::OpenSquareBracket { - l.monch(); - let d = parse_delineated_vec(l, discriminant(&Token::Comma))?; - absorb_token_or_error(l, discriminant(&Token::CloseSquareBracket))?; - d - } else { - Vec::new() - }; - Ok(TightType::Simple { name, domains }) - } - Token::OpenParen => { - let full_type = Box::new(FullType::parse(l)?); - absorb_token_or_error(l, discriminant(&Token::CloseParen))?; - Ok(TightType::Grouped(full_type)) - } - _ => Err(ParseError { - location: span, - expected: Self::expected().0, - }) - } - } -} - -#[derive(Debug)] -pub enum FullType { - Simple(TightType), - Function(Vec, Box), -} - -impl Parsable for FullType { - fn expected() -> (Vec>, bool) { - TightType::expected() - } - // ::= [ Comma [ ] [ Aro ] ] - fn parse(l: &mut WrappedLexer) -> Result { - let first_type = TightType::parse(l)?; - if let Token::Comma = l.curtok() { - l.monch(); - let other_args = parse_delineated_vec(l, discriminant(&Token::Comma))?; - absorb_token_or_error(l, discriminant(&Token::Aro))?; - let result = Box::new(FullType::parse(l)?); - let mut args = Vec::with_capacity(1 + other_args.len()); - args.push(first_type); - args.extend(other_args); - Ok(FullType::Function(args, result)) - } else { - Ok(FullType::Simple(first_type)) - } - } -} diff --git a/src/token.rs b/src/token.rs index bf7f370..5df396d 100644 --- a/src/token.rs +++ b/src/token.rs @@ -103,11 +103,11 @@ pub enum Token { #[token("in")] In, //d12 - /// An `->` arrow + /// An `=>` arrow /// /// Used as part of function type annotations as well as in the cases of If-Is blocks - #[token("->")] - Aro, //d13 + #[token("=>")] + DubAro, //d13 /// An `=` assignment operator /// @@ -115,11 +115,9 @@ pub enum Token { #[token("=")] Assign, //d14 - /// The `|` keyword (or punctuation? idk what it's called) - /// - /// Used in deliniating variants of a type - #[token("|")] - VBar, //d15 + /// Type Operator + #[token("type", priority = 9)] + TypeOp, //d15 /// The `_` symbol /// @@ -137,12 +135,15 @@ pub enum Token { /// /// i.e. Logical Or #[token("||", |_| InfixRank1::LOr)] + #[token("|", |_| InfixRank1::VBar)] R1Infix(InfixRank1), //d18 /// A rank 2 infix binop (binary operator) /// /// i.e. Logical And #[token("&&", |_| InfixRank2::LAnd)] + #[token("->", |_| InfixRank2::Aro)] + #[token(",", |_| InfixRank2::Aro)] R2Infix(InfixRank2), //d19 /// A rank 3 infix binop (binary operator) @@ -225,7 +226,7 @@ pub enum Token { /// A `,` comma /// /// The age-old and timeless delineator - #[token(",")] + #[token(";")] Comma, //d31 /// A newline NOT followed by whitespace @@ -245,7 +246,7 @@ pub enum Token { /// /// simply lexes to `Symbol(variable)`, `Assign`, `Symbol(value)`. This makes it easy /// to identify declarations. - #[regex(r"\s*\n")] + #[regex(r"(\s*\n)+")] DeclarationStart, //d32 /// Denotes that the parser has reached the end of the input @@ -261,6 +262,11 @@ pub enum InfixRank1 { /// /// Takes two boolean values and returns true if either is true LOr, + + /// The VBar operator + /// + /// Takes two variant sets and returns sum + VBar, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -270,6 +276,9 @@ pub enum InfixRank2 { /// Takes two boolean values and returns true iff both values are true. Otherwise, /// returns false. LAnd, + + /// The Aro operator + Aro, } #[derive(Clone, Copy, Debug, PartialEq, Eq)]