diff --git a/kabel-rs/src/lexer.rs b/kabel-rs/src/lexer.rs index 87eb9a9..04762a5 100644 --- a/kabel-rs/src/lexer.rs +++ b/kabel-rs/src/lexer.rs @@ -1,45 +1,107 @@ use crate::lexutil; -use lexutil::{Token, StatementToken}; +use lexutil::{Bracket, Literal, Statement, Token}; use std::error::Error; pub fn lexer(text_source: &str) -> Result, Box> { println!("Lexing!"); - let mut lexed: Vec = Vec::new(); - let mut current_token = String::new(); + let mut state = lexutil::LexerMachine { + current_token: String::new(), + lexed: Vec::new(), + quoting: false, + commenting: false, + numbering: false, + }; for (i, c) in text_source.chars().enumerate() { - current_token.push(c); - + dbg!("{} into {}", &c, &state); + + if state.commenting && c == '\n' { + state.commenting = false; + } + + // TODO: descriptive error + if c == '\"' && state.current_token.clone().pop().ok_or(panic!()) != Ok('\\') { + if state.quoting { + let mut tok_cpy = state.current_token.clone(); + tok_cpy.pop(); + state.lexed.push(Token::Literal(Literal::Str(tok_cpy))); + state.current_token = String::new(); + state.quoting = false; + } else { + state.current_token = String::new(); + state.quoting = true; + } + continue; + } + + if state.commenting { + continue; + } + + if state.quoting { + state.current_token.push(c); + } + + if c.is_ascii_digit() { + if !state.numbering { + state.numbering = true; + } + } else if state.numbering && !c.is_ascii_digit() { + state.lexed.push(Token::Literal(Literal::Num(state.current_token.parse::().unwrap()))); + state.current_token = String::new(); + state.numbering = false; + } + + state.current_token.push(c); + // Known meaningful tokens - match current_token.as_str() { + match state.current_token.as_str() { + "\n" => { + continue; + } + "#" => { + state.commenting = true; + continue; + } ";" => { - lexed.push(Token::Statement(StatementToken::Terminator)); - current_token = String::new(); + state.lexed.push(Token::Statement(Statement::Terminator)); + state.current_token = String::new(); } "if " => { - lexed.push(Token::Statement(StatementToken::Conditional)); - current_token = String::new(); + state.lexed.push(Token::Statement(Statement::Conditional)); + state.current_token = String::new(); continue; } "to " => { - lexed.push(Token::Statement(StatementToken::FunctionDef)); - current_token = String::new(); + state.lexed.push(Token::Statement(Statement::FunctionDef)); + state.current_token = String::new(); continue; } "for " => { - lexed.push(Token::Statement(StatementToken::ForLoop)); - current_token = String::new(); + state.lexed.push(Token::Statement(Statement::ForLoop)); + state.current_token = String::new(); continue; } "while " => { - lexed.push(Token::Statement(StatementToken::WhileLoop)); - current_token = String::new(); + state.lexed.push(Token::Statement(Statement::WhileLoop)); + state.current_token = String::new(); continue; } + "(" => { + state.lexed.push(Token::Bracket(Bracket::Open)); + state.current_token = String::new(); + continue; + } + ")" => { + state.lexed.push(Token::Bracket(Bracket::Close)); + state.current_token = String::new(); + continue; + } + &_ => {} } } - Ok(lexed) + Ok(state.lexed) } diff --git a/kabel-rs/src/lexutil.rs b/kabel-rs/src/lexutil.rs index 7b0e0ab..7626872 100644 --- a/kabel-rs/src/lexutil.rs +++ b/kabel-rs/src/lexutil.rs @@ -1,9 +1,9 @@ use crate::variables; - // parts of Token -pub enum ArithmeticOperatorToken { +#[derive(Debug)] +pub enum ArithmeticOperator { Add, Subtract, Multiply, @@ -12,17 +12,20 @@ pub enum ArithmeticOperatorToken { Modulus, } -pub enum BooleanOperatorToken { +#[derive(Debug)] +pub enum BooleanOperator { And, Or, } -pub enum LiteralToken { +#[derive(Debug)] +pub enum Literal { Str(String), Num(f64), } -pub enum StatementToken { +#[derive(Debug)] +pub enum Statement { Conditional, ForLoop, WhileLoop, @@ -30,17 +33,26 @@ pub enum StatementToken { Terminator, } -pub enum BracketToken { +#[derive(Debug)] +pub enum Bracket { Open, Close, } +#[derive(Debug)] pub enum Token { - Literal(LiteralToken), - ArithmeticOperator(ArithmeticOperatorToken), - Statement(StatementToken), - Bracket(BracketToken), - Variable(variables::VariableToken), + Literal(Literal), + ArithmeticOperator(ArithmeticOperator), + Statement(Statement), + Bracket(Bracket), + Variable(variables::Variable), } - +#[derive(Debug)] +pub struct LexerMachine { + pub current_token: String, + pub lexed: Vec, + pub quoting: bool, + pub commenting: bool, + pub numbering: bool, +} diff --git a/kabel-rs/src/main.rs b/kabel-rs/src/main.rs index ac3a9e0..45b5969 100644 --- a/kabel-rs/src/main.rs +++ b/kabel-rs/src/main.rs @@ -1,8 +1,8 @@ +use lexutil::Token; use std::env; use std::error::Error; use std::fs::File; use std::io::{self, Read}; -use lexutil::Token; mod lexer; mod lexutil; @@ -38,14 +38,13 @@ fn main() -> Result<(), Box> { // Lex! let lexed: Vec = match lexer::lexer(&text_source) { Err(e) => { - eprintln!( - "Lexer fail with {}", - e - ); + eprintln!("Lexer fail with {}", e); std::process::exit(1); } - Ok(lexed) => lexed + Ok(lexed) => lexed, }; + dbg!("Lexed length {}", lexed.len()); + Ok(()) } diff --git a/kabel-rs/src/modules.rs b/kabel-rs/src/modules.rs index 20dfdc9..2eb14d6 100644 --- a/kabel-rs/src/modules.rs +++ b/kabel-rs/src/modules.rs @@ -1,5 +1,7 @@ -struct Thruster {} +#[derive(Debug)] +pub struct Thruster {} -pub enum ModuleVar { +#[derive(Debug)] +pub enum Module { Thruster(Thruster), } diff --git a/kabel-rs/src/try.kab b/kabel-rs/src/try.kab new file mode 100644 index 0000000..761d1ef --- /dev/null +++ b/kabel-rs/src/try.kab @@ -0,0 +1,3 @@ +"string lit"; +("string lit");# comment +12345*(60+80); diff --git a/kabel-rs/src/variables.rs b/kabel-rs/src/variables.rs index 601b61a..22645a3 100644 --- a/kabel-rs/src/variables.rs +++ b/kabel-rs/src/variables.rs @@ -1,7 +1,8 @@ use crate::modules; -pub enum VariableToken { - Module(modules::ModuleVar), +#[derive(Debug)] +pub enum Variable { + Module(modules::Module), Num(f64), Str(String), }