lexer refactor (and first working version!)

This commit is contained in:
TerraMaster85 2024-01-16 14:53:09 -05:00
parent 82e43a271e
commit 821073e88a
3 changed files with 141 additions and 155 deletions

View File

@ -1,156 +1,152 @@
use crate::lexutil; use crate::lexutil;
use lexutil::{Bracket, Literal, Statement, ArithOperator, Token}; use lexutil::{ArithOperator, Bracket, Literal, Statement, Token};
use std::error::Error; use std::error::Error;
#[derive(Debug)]
enum State {
Stringing,
Commenting,
Numbering,
BuildingToken,
}
pub fn lexer(text_source: &str) -> Result<Vec<Token>, Box<dyn Error>> { pub fn lexer(text_source: &str) -> Result<Vec<Token>, Box<dyn Error>> {
println!("Lexing!"); println!("Lexing!");
let mut state = lexutil::LexerMachine { let mut current_token = String::new();
current_token: String::new(), let mut lexed = Vec::new();
lexed: Vec::new(), let mut state: State = State::BuildingToken;
quoting: false,
commenting: false,
numbering: false,
escape_next: false,
};
for (i, c) in text_source.chars().enumerate() { let mut chars = text_source.chars().peekable();
dbg!("Begin", &c, &state);
// Commenting end while let Some(c) = chars.next() {
if state.commenting && c == '\n' { match state {
state.commenting = false; State::Commenting => {
continue; // Stop commenting at end of line
}
// Commenting continue
if state.commenting {
if c == '\n' { if c == '\n' {
state.commenting = false; state = State::BuildingToken;
} }
continue;
} }
State::Stringing => {
// Stringing begin/end // If next char is an unescaped quote
if c == '\"' && !state.escape_next { if let Some(c_peek) = chars.peek() {
if state.quoting { if c != '\\' && *c_peek == '\"' {
let tok_cpy = state.current_token.clone(); dbg!("hi");
state.lexed.push(Token::Literal(Literal::Str(tok_cpy))); chars.next();
state.current_token = String::new(); current_token.push(c);
state.quoting = false; let tok_cpy = current_token.clone();
lexed.push(Token::Literal(Literal::Str(tok_cpy)));
state = State::BuildingToken;
current_token = String::new();
} else { } else {
state.current_token = String::new(); current_token.push(c);
state.quoting = true;
} }
continue; } else {
} else if state.escape_next { dbg!("h");
state.current_token.push(c); continue; // we're at the end. we should bring a user error
state.escape_next = false; // because this string was not properly delimited
continue;
} }
// Stringing continue
if state.quoting {
if c == '\\' {
state.escape_next = true;
} }
state.current_token.push(c); State::Numbering => {
continue; // If next char isn't numeric, is at end of this number literal
} if let Some(c_peek) = chars.peek() {
current_token.push(c);
if c.is_ascii_digit() { if !c_peek.is_ascii_digit() {
if !state.numbering { lexed.push(Token::Literal(Literal::Num(
state.numbering = true; // if this unwrap fails, we've failed
} // to confirm that this is a number literal
} else if state.numbering && !c.is_ascii_digit() { current_token.parse::<f64>().unwrap(),
state.lexed.push(Token::Literal(Literal::Num(
state.current_token.parse::<f64>().unwrap(),
))); )));
state.current_token = String::new(); state = State::BuildingToken;
state.numbering = false; current_token = String::new();
}
} else {
continue; // we're at the end. not a problem because
// numbers self-terminate
}
}
State::BuildingToken => {
if c == '\"' {
state = State::Stringing;
current_token = String::new();
// We don't need to push c because it's the quote delimiter,
// which has already served its purpose as an indicator
continue;
} else if c.is_ascii_digit() {
state = State::Numbering;
current_token = c.to_string();
continue;
} }
// Known meaningful tokens // Known meaningful tokens
match state.current_token.as_str() { current_token.push(c);
match current_token.as_str() {
"\n" => { "\n" => {
continue; continue;
} }
"#" => { "#" => {
state.commenting = true; state = State::Commenting;
state.current_token = String::new(); current_token = String::new();
continue;
} }
";" => { ";" => {
state.lexed.push(Token::Statement(Statement::Terminator)); lexed.push(Token::Statement(Statement::Terminator));
state.current_token = String::new(); current_token = String::new();
} }
"if " => { "if " => {
state.lexed.push(Token::Statement(Statement::Conditional)); lexed.push(Token::Statement(Statement::Conditional));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"to " => { "to " => {
state.lexed.push(Token::Statement(Statement::FunctionDef)); lexed.push(Token::Statement(Statement::FunctionDef));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"for " => { "for " => {
state.lexed.push(Token::Statement(Statement::ForLoop)); lexed.push(Token::Statement(Statement::ForLoop));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"while " => { "while " => {
state.lexed.push(Token::Statement(Statement::WhileLoop)); lexed.push(Token::Statement(Statement::WhileLoop));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"(" => {t w "(" => {
state.lexed.push(Token::Bracket(Bracket::Open)); lexed.push(Token::Bracket(Bracket::Open));
state.current_token = String::new(); current_token = String::new();
continue;
} }
")" => { ")" => {
state.lexed.push(Token::Bracket(Bracket::Close)); lexed.push(Token::Bracket(Bracket::Close));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"*" => { "*" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Multiply)); lexed.push(Token::ArithOperator(ArithOperator::Multiply));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"/" => { "/" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Divide)); lexed.push(Token::ArithOperator(ArithOperator::Divide));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"+" => { "+" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Add)); lexed.push(Token::ArithOperator(ArithOperator::Add));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"-" => { "-" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Subtract)); lexed.push(Token::ArithOperator(ArithOperator::Subtract));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"^" => { "^" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Exponentiate)); lexed.push(Token::ArithOperator(ArithOperator::Exponentiate));
state.current_token = String::new(); current_token = String::new();
continue;
} }
"%" => { "%" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Reduce)); lexed.push(Token::ArithOperator(ArithOperator::Reduce));
state.current_token = String::new(); current_token = String::new();
continue;
} }
&_ => {} &_ => {}
} }
state.current_token.push(c); }
dbg!("End", &c, &state); }
dbg!(&c, &state, &current_token, &lexed);
} }
Ok(state.lexed) Ok(lexed)
} }

View File

@ -47,13 +47,3 @@ pub enum Token {
Bracket(Bracket), Bracket(Bracket),
Variable(variables::Variable), Variable(variables::Variable),
} }
#[derive(Debug)]
pub struct LexerMachine {
pub current_token: String,
pub lexed: Vec<Token>,
pub quoting: bool,
pub commenting: bool,
pub numbering: bool,
pub escape_next: bool,
}

View File

@ -44,7 +44,7 @@ fn main() -> Result<(), Box<dyn Error>> {
Ok(lexed) => lexed, Ok(lexed) => lexed,
}; };
dbg!(lexed.len()); dbg!(lexed.len(), lexed);
Ok(()) Ok(())
} }