From 821073e88a6ede10e54ef8167e2ecff69c0fdd74 Mon Sep 17 00:00:00 2001 From: TerraMaster85 Date: Tue, 16 Jan 2024 14:53:09 -0500 Subject: [PATCH] lexer refactor (and first working version!) --- kabel-rs/src/lexer.rs | 284 ++++++++++++++++++++-------------------- kabel-rs/src/lexutil.rs | 10 -- kabel-rs/src/main.rs | 2 +- 3 files changed, 141 insertions(+), 155 deletions(-) diff --git a/kabel-rs/src/lexer.rs b/kabel-rs/src/lexer.rs index 31710fa..4e2d281 100644 --- a/kabel-rs/src/lexer.rs +++ b/kabel-rs/src/lexer.rs @@ -1,156 +1,152 @@ use crate::lexutil; -use lexutil::{Bracket, Literal, Statement, ArithOperator, Token}; +use lexutil::{ArithOperator, Bracket, Literal, Statement, Token}; use std::error::Error; +#[derive(Debug)] +enum State { + Stringing, + Commenting, + Numbering, + BuildingToken, +} + pub fn lexer(text_source: &str) -> Result, Box> { println!("Lexing!"); - let mut state = lexutil::LexerMachine { - current_token: String::new(), - lexed: Vec::new(), - quoting: false, - commenting: false, - numbering: false, - escape_next: false, - }; + let mut current_token = String::new(); + let mut lexed = Vec::new(); + let mut state: State = State::BuildingToken; - for (i, c) in text_source.chars().enumerate() { - dbg!("Begin", &c, &state); + let mut chars = text_source.chars().peekable(); - // Commenting end - if state.commenting && c == '\n' { - state.commenting = false; - continue; + while let Some(c) = chars.next() { + match state { + State::Commenting => { + // Stop commenting at end of line + if c == '\n' { + state = State::BuildingToken; + } + } + State::Stringing => { + // If next char is an unescaped quote + if let Some(c_peek) = chars.peek() { + if c != '\\' && *c_peek == '\"' { + dbg!("hi"); + chars.next(); + current_token.push(c); + let tok_cpy = current_token.clone(); + lexed.push(Token::Literal(Literal::Str(tok_cpy))); + state = State::BuildingToken; + current_token = String::new(); + } else { + current_token.push(c); + } + } else { + dbg!("h"); + continue; // we're at the end. we should bring a user error + // because this string was not properly delimited + } + } + State::Numbering => { + // If next char isn't numeric, is at end of this number literal + if let Some(c_peek) = chars.peek() { + current_token.push(c); + if !c_peek.is_ascii_digit() { + lexed.push(Token::Literal(Literal::Num( + // if this unwrap fails, we've failed + // to confirm that this is a number literal + current_token.parse::().unwrap(), + ))); + state = State::BuildingToken; + current_token = String::new(); + } + } else { + continue; // we're at the end. not a problem because + // numbers self-terminate + } + } + State::BuildingToken => { + if c == '\"' { + state = State::Stringing; + current_token = String::new(); + // We don't need to push c because it's the quote delimiter, + // which has already served its purpose as an indicator + continue; + } else if c.is_ascii_digit() { + state = State::Numbering; + current_token = c.to_string(); + continue; + } + + // Known meaningful tokens + current_token.push(c); + match current_token.as_str() { + "\n" => { + continue; + } + "#" => { + state = State::Commenting; + current_token = String::new(); + } + ";" => { + lexed.push(Token::Statement(Statement::Terminator)); + current_token = String::new(); + } + "if " => { + lexed.push(Token::Statement(Statement::Conditional)); + current_token = String::new(); + } + "to " => { + lexed.push(Token::Statement(Statement::FunctionDef)); + current_token = String::new(); + } + "for " => { + lexed.push(Token::Statement(Statement::ForLoop)); + current_token = String::new(); + } + "while " => { + lexed.push(Token::Statement(Statement::WhileLoop)); + current_token = String::new(); + } + "(" => { + lexed.push(Token::Bracket(Bracket::Open)); + current_token = String::new(); + } + ")" => { + lexed.push(Token::Bracket(Bracket::Close)); + current_token = String::new(); + } + "*" => { + lexed.push(Token::ArithOperator(ArithOperator::Multiply)); + current_token = String::new(); + } + "/" => { + lexed.push(Token::ArithOperator(ArithOperator::Divide)); + current_token = String::new(); + } + "+" => { + lexed.push(Token::ArithOperator(ArithOperator::Add)); + current_token = String::new(); + } + "-" => { + lexed.push(Token::ArithOperator(ArithOperator::Subtract)); + current_token = String::new(); + } + "^" => { + lexed.push(Token::ArithOperator(ArithOperator::Exponentiate)); + current_token = String::new(); + } + "%" => { + lexed.push(Token::ArithOperator(ArithOperator::Reduce)); + current_token = String::new(); + } + + &_ => {} + } + } } - - // Commenting continue - if state.commenting { - if c == '\n' { - state.commenting = false; - } - continue; - } - - // Stringing begin/end - if c == '\"' && !state.escape_next { - if state.quoting { - let tok_cpy = state.current_token.clone(); - state.lexed.push(Token::Literal(Literal::Str(tok_cpy))); - state.current_token = String::new(); - state.quoting = false; - } else { - state.current_token = String::new(); - state.quoting = true; - } - continue; - } else if state.escape_next { - state.current_token.push(c); - state.escape_next = false; - continue; - } - - // Stringing continue - if state.quoting { - if c == '\\' { - state.escape_next = true; - } - state.current_token.push(c); - continue; - } - - if c.is_ascii_digit() { - if !state.numbering { - state.numbering = true; - } - } else if state.numbering && !c.is_ascii_digit() { - state.lexed.push(Token::Literal(Literal::Num( - state.current_token.parse::().unwrap(), - ))); - state.current_token = String::new(); - state.numbering = false; - } - - // Known meaningful tokens - match state.current_token.as_str() { - "\n" => { - continue; - } - "#" => { - state.commenting = true; - state.current_token = String::new(); - continue; - } - ";" => { - state.lexed.push(Token::Statement(Statement::Terminator)); - state.current_token = String::new(); - } - "if " => { - state.lexed.push(Token::Statement(Statement::Conditional)); - state.current_token = String::new(); - continue; - } - "to " => { - state.lexed.push(Token::Statement(Statement::FunctionDef)); - state.current_token = String::new(); - continue; - } - "for " => { - state.lexed.push(Token::Statement(Statement::ForLoop)); - state.current_token = String::new(); - continue; - } - "while " => { - state.lexed.push(Token::Statement(Statement::WhileLoop)); - state.current_token = String::new(); - continue; - } - "(" => {t w - state.lexed.push(Token::Bracket(Bracket::Open)); - state.current_token = String::new(); - continue; - } - ")" => { - state.lexed.push(Token::Bracket(Bracket::Close)); - state.current_token = String::new(); - continue; - } - "*" => { - state.lexed.push(Token::ArithOperator(ArithOperator::Multiply)); - state.current_token = String::new(); - continue; - } - "/" => { - state.lexed.push(Token::ArithOperator(ArithOperator::Divide)); - state.current_token = String::new(); - continue; - } - "+" => { - state.lexed.push(Token::ArithOperator(ArithOperator::Add)); - state.current_token = String::new(); - continue; - } - "-" => { - state.lexed.push(Token::ArithOperator(ArithOperator::Subtract)); - state.current_token = String::new(); - continue; - } - "^" => { - state.lexed.push(Token::ArithOperator(ArithOperator::Exponentiate)); - state.current_token = String::new(); - continue; - } - "%" => { - state.lexed.push(Token::ArithOperator(ArithOperator::Reduce)); - state.current_token = String::new(); - continue; - } - - - &_ => {} - } - state.current_token.push(c); - dbg!("End", &c, &state); + dbg!(&c, &state, ¤t_token, &lexed); } - Ok(state.lexed) + Ok(lexed) } diff --git a/kabel-rs/src/lexutil.rs b/kabel-rs/src/lexutil.rs index ab576c3..0ad2a63 100644 --- a/kabel-rs/src/lexutil.rs +++ b/kabel-rs/src/lexutil.rs @@ -47,13 +47,3 @@ pub enum Token { Bracket(Bracket), Variable(variables::Variable), } - -#[derive(Debug)] -pub struct LexerMachine { - pub current_token: String, - pub lexed: Vec, - pub quoting: bool, - pub commenting: bool, - pub numbering: bool, - pub escape_next: bool, -} diff --git a/kabel-rs/src/main.rs b/kabel-rs/src/main.rs index 9697b6b..ad3e1ec 100644 --- a/kabel-rs/src/main.rs +++ b/kabel-rs/src/main.rs @@ -44,7 +44,7 @@ fn main() -> Result<(), Box> { Ok(lexed) => lexed, }; - dbg!(lexed.len()); + dbg!(lexed.len(), lexed); Ok(()) }