From d13c9befb2c231ebaea750a45d783e55ea49c775 Mon Sep 17 00:00:00 2001 From: TerraMaster85 Date: Tue, 16 Jan 2024 09:21:51 -0500 Subject: [PATCH] More lexer work (almost working) --- .gitignore | 1 + kabel-rs/src/errors.rs | 1 + kabel-rs/src/lexer.rs | 72 ++++++++++++++++++++++++++++++++++------- kabel-rs/src/lexutil.rs | 7 ++-- 4 files changed, 66 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index c0d979a..a1def16 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ kabel-rs/target +*/target diff --git a/kabel-rs/src/errors.rs b/kabel-rs/src/errors.rs index e69de29..8b13789 100644 --- a/kabel-rs/src/errors.rs +++ b/kabel-rs/src/errors.rs @@ -0,0 +1 @@ + diff --git a/kabel-rs/src/lexer.rs b/kabel-rs/src/lexer.rs index 04762a5..c21a2f9 100644 --- a/kabel-rs/src/lexer.rs +++ b/kabel-rs/src/lexer.rs @@ -1,5 +1,5 @@ use crate::lexutil; -use lexutil::{Bracket, Literal, Statement, Token}; +use lexutil::{Bracket, Literal, Statement, ArithOperator, Token}; use std::error::Error; pub fn lexer(text_source: &str) -> Result, Box> { @@ -11,20 +11,30 @@ pub fn lexer(text_source: &str) -> Result, Box> { quoting: false, commenting: false, numbering: false, + escape_next: false, }; for (i, c) in text_source.chars().enumerate() { dbg!("{} into {}", &c, &state); + // Commenting end if state.commenting && c == '\n' { state.commenting = false; + continue; } - // TODO: descriptive error - if c == '\"' && state.current_token.clone().pop().ok_or(panic!()) != Ok('\\') { + // Commenting continue + if state.commenting { + if c == '\n' { + state.commenting = false; + } + continue; + } + + // Stringing begin/end + if c == '\"' && !state.escape_next { if state.quoting { - let mut tok_cpy = state.current_token.clone(); - tok_cpy.pop(); + let tok_cpy = state.current_token.clone(); state.lexed.push(Token::Literal(Literal::Str(tok_cpy))); state.current_token = String::new(); state.quoting = false; @@ -33,14 +43,19 @@ pub fn lexer(text_source: &str) -> Result, Box> { state.quoting = true; } continue; - } - - if state.commenting { + } else if state.escape_next { + state.current_token.push(c); + state.escape_next = false; continue; } + // Stringing continue if state.quoting { + if c == '\\' { + state.escape_next = true; + } state.current_token.push(c); + continue; } if c.is_ascii_digit() { @@ -48,13 +63,13 @@ pub fn lexer(text_source: &str) -> Result, Box> { state.numbering = true; } } else if state.numbering && !c.is_ascii_digit() { - state.lexed.push(Token::Literal(Literal::Num(state.current_token.parse::().unwrap()))); + state.lexed.push(Token::Literal(Literal::Num( + state.current_token.parse::().unwrap(), + ))); state.current_token = String::new(); state.numbering = false; } - state.current_token.push(c); - // Known meaningful tokens match state.current_token.as_str() { "\n" => { @@ -62,6 +77,7 @@ pub fn lexer(text_source: &str) -> Result, Box> { } "#" => { state.commenting = true; + state.current_token = String::new(); continue; } ";" => { @@ -98,9 +114,41 @@ pub fn lexer(text_source: &str) -> Result, Box> { state.current_token = String::new(); continue; } + "*" => { + state.lexed.push(Token::ArithOperator(ArithOperator::Multiply)); + state.current_token = String::new(); + continue; + } + "/" => { + state.lexed.push(Token::ArithOperator(ArithOperator::Divide)); + state.current_token = String::new(); + continue; + } + "+" => { + state.lexed.push(Token::ArithOperator(ArithOperator::Add)); + state.current_token = String::new(); + continue; + } + "-" => { + state.lexed.push(Token::ArithOperator(ArithOperator::Subtract)); + state.current_token = String::new(); + continue; + } + "^" => { + state.lexed.push(Token::ArithOperator(ArithOperator::Exponentiate)); + state.current_token = String::new(); + continue; + } + "%" => { + state.lexed.push(Token::ArithOperator(ArithOperator::Reduce)); + state.current_token = String::new(); + continue; + } - &_ => {} + + &_ => { state.current_token.push(c); } } + } Ok(state.lexed) diff --git a/kabel-rs/src/lexutil.rs b/kabel-rs/src/lexutil.rs index 7626872..ab576c3 100644 --- a/kabel-rs/src/lexutil.rs +++ b/kabel-rs/src/lexutil.rs @@ -3,13 +3,13 @@ use crate::variables; // parts of Token #[derive(Debug)] -pub enum ArithmeticOperator { +pub enum ArithOperator { Add, Subtract, Multiply, Divide, Exponentiate, - Modulus, + Reduce, } #[derive(Debug)] @@ -42,7 +42,7 @@ pub enum Bracket { #[derive(Debug)] pub enum Token { Literal(Literal), - ArithmeticOperator(ArithmeticOperator), + ArithOperator(ArithOperator), Statement(Statement), Bracket(Bracket), Variable(variables::Variable), @@ -55,4 +55,5 @@ pub struct LexerMachine { pub quoting: bool, pub commenting: bool, pub numbering: bool, + pub escape_next: bool, }