//! A series of structures and methods for lexing an input //! //! Lexing is the process of taking a stream of characters and converting it into a series //! of tokens (or lexemes). Lexemes are designed to be the the smallest //! machine-understandable unit of information - for example, a keyword, operator, //! literal, or symbol. //! //! Look through the [`Token`] class to see a full list of the lexemes used by amo. //! //! The main entrypoint to this module is the derived [`Token::lexer`] method, which lexes //! a provided string. use logos::Logos; #[derive(Logos, Debug, PartialEq, Eq)] pub enum Token { /// A lexer error happened, woops! /// /// Also, this catches and ignores any whitespace that might be encountered #[error] #[regex(r"[ \t]+", logos::skip)] #[regex(r"\n[ \t]+", logos::skip)] #[regex(r"//.+", logos::skip)] #[regex(r"/\*([^*]*(\*[^/])?)+\*/", logos::skip)] Error, //d00 /// The `type` keyword /// /// Used to denote that a new type (called an enum in some languages) is being /// declared #[token("type")] Type, //d01 /// The `struct` keyword /// /// Used to denote the declaration of a kind of struct #[token("struct")] Struct, //d02 /// The `trait` keywor, Eqd /// /// Denotes the declaration of a new trait #[token("trait")] Trait, //d03 /// The `needs` keyword /// /// Used as part of a trait declaration to denote methods that will be required for a /// trait #[token("needs")] Needs, //d04 /// The `if` keyword /// /// Used to begin an If-Then-Else statement or an If-Is statement #[token("if")] If, //d05 /// The `is` keyword /// /// Used as part of an If-Is statement to indictate the start of the case listings #[token("is")] Is, //d06 /// The `then` keyword /// /// Indicates the start of the code block for the positive section of an If-Then-Else /// statement #[token("then")] Then, //d07 /// The `else` keyword /// /// Denotes the end of the positive section of an If-Then-Else block, and the begining /// of the negative section #[token("else")] Else, //d08 /// the `impl` keyword /// /// Used to denote the start of a trait implementation #[token("impl")] Impl, //d09 /// the `on` keyword /// /// Used in trait implementationsto seperate the trait being implemented and the type /// it's being implemented on. #[token("on")] On, //d10 /// the `let` keyword /// /// Allows binding a value to an immutable variable that can be used multiple times #[token("let")] Let, //d11 /// the `in` keyword /// /// Used to seperate a series of `let` bindings from the expression they're being used /// in. #[token("in")] In, //d12 /// An `=>` arrow /// /// Used as part of function type annotations as well as in the cases of If-Is blocks #[token("=>")] DubAro, //d13 /// An `=` assignment operator /// /// Used to seperate the left & right hand signs of an assignment operation #[token("=")] Assign, //d14 /// Type Operator #[token("type", priority = 9)] TypeOp, //d15 /// The `_` symbol /// /// Generally used as a placeholder or standin for another type #[token("_")] Placeholder, //d16 /// The `:` symbol /// /// Used as a seperator in various parts of the language #[token(":")] Colon, //d17 /// A rank 1 (applied last) infix binop (binary operator) /// /// i.e. Logical Or #[token("||", |_| InfixRank1::LOr)] #[token("|", |_| InfixRank1::VBar)] R1Infix(InfixRank1), //d18 /// A rank 2 infix binop (binary operator) /// /// i.e. Logical And #[token("&&", |_| InfixRank2::LAnd)] #[token("->", |_| InfixRank2::Aro)] #[token(",", |_| InfixRank2::Aro)] R2Infix(InfixRank2), //d19 /// A rank 3 infix binop (binary operator) /// /// i.e. Comparison operators like == and < #[token("==", |_| InfixRank3::Eq)] #[token("!=", |_| InfixRank3::NEq)] #[token("<", |_| InfixRank3::LessThan)] #[token(">", |_| InfixRank3::GreaterThan)] R3Infix(InfixRank3), //d20 /// A rank 5 infix binop (binary operator) /// /// i.e. Range #[token("..", |_| InfixRank4::Range)] R4Infix(InfixRank4), //d21 /// A rank 6 infix binop (binary operator) /// /// i.e. Addition & Subtraction #[token("+", |_| InfixRank5::Add)] #[token("-", |_| InfixRank5::Sub)] R5Infix(InfixRank5), //d22 /// A rank 7 (applied first) infix binop (binary operator) /// /// i.e. Multiplication, Division, and Modulo #[token("*", |_| InfixRank6::Mul)] #[token("/", |_| InfixRank6::Div)] #[token("%", |_| InfixRank6::Mod)] R6Infix(InfixRank6), //d23 /// Some literal (a constant value represented textually) /// /// For example, 100 is an integer literal, "hewwo" is a string literal, and `true` is /// a boolean literal. #[regex("\"(?:[^\"]*(?:\\\\\")?)+\"", |lex| Literal::from_string_match(lex.slice()))] #[regex(r"\d+", |lex| Literal::from_int_match(lex.slice()))] Literal(Literal), //d24 /// Some symbol, usually a variable or a type #[regex(r"[a-zA-Z_][a-zA-Z\d_]*", |lex| lex.slice().to_string(), priority = 0)] Symbol(String), //d25 /// An opening `[` square bracket /// /// Usually used in arrays and domain restrictions #[token("[")] OpenSquareBracket, //d26 /// A closing `]` square bracket /// /// Usually used in arrays and domain restrictions, and the counterpart to the opening /// square bracket. /// /// In amo, the opening and closing square brackets are both lesbians, and they're /// dating. The closing square bracket is transgender, also. #[token("]")] CloseSquareBracket, //d27 /// An opening `(` paren /// /// Usually used to make explicit the order of operations #[token("(")] OpenParen, //d28 /// A closing `)` paren /// /// Usually used in arrays and domain restrictions, this is the counterpart to the /// open parenthesis. #[token(")")] CloseParen, //d29 /// A `.` period /// /// For getting fields of structs #[token(".")] Dot, //d30 /// A `,` comma /// /// The age-old and timeless delineator #[token(";")] Comma, //d31 /// A newline NOT followed by whitespace /// /// This means that the following tokens are at the start of a line. For example /// /// ``` /// variable = value /// ``` /// /// lexes to `DeclarationStart`, `Symbol(variable)`, `Assign`, `Symbol(value)`, /// whereas /// /// ``` /// variable = value /// ``` /// /// simply lexes to `Symbol(variable)`, `Assign`, `Symbol(value)`. This makes it easy /// to identify declarations. #[regex(r"(\s*\n)+")] DeclarationStart, //d32 /// Denotes that the parser has reached the end of the input /// /// This is always the last token in a stream, both in that it must be present in all /// streams, and in that it will never be followed by any tokens. EOF, //d33 } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum InfixRank1 { /// The logical OR operator /// /// Takes two boolean values and returns true if either is true LOr, /// The VBar operator /// /// Takes two variant sets and returns sum VBar, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum InfixRank2 { /// The logical AND operator /// /// Takes two boolean values and returns true iff both values are true. Otherwise, /// returns false. LAnd, /// The Aro operator Aro, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum InfixRank3 { /// The equality operator. /// /// Takes two values and returns true iff they are equal Eq, /// The inequality operator. /// /// Takes two values and returns true iff they are NOT equal NEq, /// The less-than operator. /// /// Takes two numeric values and returns true iff the first is LESS than the second LessThan, /// The greater-than operator. /// /// Takes two numeric values and returns true iff the first is GREATER than the second GreaterThan, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum InfixRank4 { /// The additive operator. /// /// Takes two numeric values and returns a range from the first to the second Range, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum InfixRank5 { /// The additive operator. /// /// Takes two numeric values and returns their sum Add, /// The subtractive operator. /// /// Takes two numeric values and returns their difference Sub, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum InfixRank6 { /// The multiplicitive operator. /// /// Takes two numeric values and returns their product Mul, /// The modulo operator. /// /// Takes two numeric values and returns the remainder of their division Mod, /// The division operator. /// /// Takes two numeric values and returns their quotient Div, } #[derive(Clone, Debug, PartialEq, Eq)] /// A specific type of literal, used for the [`Token::Literal`] token pub enum Literal { /// A string literal /// /// The internal [`String`] is the content of the string, with escape characters /// already processed. String(String), /// An integer literal Int(u64) } impl Literal { /// Create a string literal by parsing a matched regex /// /// The input should be in the form of `""` WITH THE QUOTES ("). The quotes /// will then be trimmed, and character escape sequences will be substituted. pub fn from_string_match(s: &str) -> Self { if s.len() < 2 { // This should be unreachable, but this is kept just in case. eprintln!("[WARN] ---[Ruh roh!]-------------------------------------"); eprintln!("[WARN] Unreachable executed in token::Literal::from_string_match!"); eprintln!("[WARN] This sugguests that the regex or callback for the string literal token is incorrect."); eprintln!("[WARN] Attempting to proceed anyway, but this indicates a serious problem with the lexer."); eprintln!("[WARN] --------------------------------------------------"); Self::String(String::new()) } else { Self::String(s[1..s.len()-1].to_string()) } } /// Create an integer literal by parsing a regex match /// /// The input should be in the form of a series of ASCII digits 0-9 of any length. /// Any parse errors will result in [`None`] being returned instead. These indicate a /// problem with the user's code, and should be reported. pub fn from_int_match(s: &str) -> Option { s.parse().ok().map(Self::Int) } }