lexer refactor (and first working version!)

This commit is contained in:
TerraMaster85 2024-01-16 14:53:09 -05:00
parent 82e43a271e
commit 821073e88a
3 changed files with 141 additions and 155 deletions

View file

@ -1,156 +1,152 @@
use crate::lexutil;
use lexutil::{Bracket, Literal, Statement, ArithOperator, Token};
use lexutil::{ArithOperator, Bracket, Literal, Statement, Token};
use std::error::Error;
#[derive(Debug)]
enum State {
Stringing,
Commenting,
Numbering,
BuildingToken,
}
pub fn lexer(text_source: &str) -> Result<Vec<Token>, Box<dyn Error>> {
println!("Lexing!");
let mut state = lexutil::LexerMachine {
current_token: String::new(),
lexed: Vec::new(),
quoting: false,
commenting: false,
numbering: false,
escape_next: false,
};
let mut current_token = String::new();
let mut lexed = Vec::new();
let mut state: State = State::BuildingToken;
for (i, c) in text_source.chars().enumerate() {
dbg!("Begin", &c, &state);
let mut chars = text_source.chars().peekable();
// Commenting end
if state.commenting && c == '\n' {
state.commenting = false;
continue;
while let Some(c) = chars.next() {
match state {
State::Commenting => {
// Stop commenting at end of line
if c == '\n' {
state = State::BuildingToken;
}
}
State::Stringing => {
// If next char is an unescaped quote
if let Some(c_peek) = chars.peek() {
if c != '\\' && *c_peek == '\"' {
dbg!("hi");
chars.next();
current_token.push(c);
let tok_cpy = current_token.clone();
lexed.push(Token::Literal(Literal::Str(tok_cpy)));
state = State::BuildingToken;
current_token = String::new();
} else {
current_token.push(c);
}
} else {
dbg!("h");
continue; // we're at the end. we should bring a user error
// because this string was not properly delimited
}
}
State::Numbering => {
// If next char isn't numeric, is at end of this number literal
if let Some(c_peek) = chars.peek() {
current_token.push(c);
if !c_peek.is_ascii_digit() {
lexed.push(Token::Literal(Literal::Num(
// if this unwrap fails, we've failed
// to confirm that this is a number literal
current_token.parse::<f64>().unwrap(),
)));
state = State::BuildingToken;
current_token = String::new();
}
} else {
continue; // we're at the end. not a problem because
// numbers self-terminate
}
}
State::BuildingToken => {
if c == '\"' {
state = State::Stringing;
current_token = String::new();
// We don't need to push c because it's the quote delimiter,
// which has already served its purpose as an indicator
continue;
} else if c.is_ascii_digit() {
state = State::Numbering;
current_token = c.to_string();
continue;
}
// Known meaningful tokens
current_token.push(c);
match current_token.as_str() {
"\n" => {
continue;
}
"#" => {
state = State::Commenting;
current_token = String::new();
}
";" => {
lexed.push(Token::Statement(Statement::Terminator));
current_token = String::new();
}
"if " => {
lexed.push(Token::Statement(Statement::Conditional));
current_token = String::new();
}
"to " => {
lexed.push(Token::Statement(Statement::FunctionDef));
current_token = String::new();
}
"for " => {
lexed.push(Token::Statement(Statement::ForLoop));
current_token = String::new();
}
"while " => {
lexed.push(Token::Statement(Statement::WhileLoop));
current_token = String::new();
}
"(" => {
lexed.push(Token::Bracket(Bracket::Open));
current_token = String::new();
}
")" => {
lexed.push(Token::Bracket(Bracket::Close));
current_token = String::new();
}
"*" => {
lexed.push(Token::ArithOperator(ArithOperator::Multiply));
current_token = String::new();
}
"/" => {
lexed.push(Token::ArithOperator(ArithOperator::Divide));
current_token = String::new();
}
"+" => {
lexed.push(Token::ArithOperator(ArithOperator::Add));
current_token = String::new();
}
"-" => {
lexed.push(Token::ArithOperator(ArithOperator::Subtract));
current_token = String::new();
}
"^" => {
lexed.push(Token::ArithOperator(ArithOperator::Exponentiate));
current_token = String::new();
}
"%" => {
lexed.push(Token::ArithOperator(ArithOperator::Reduce));
current_token = String::new();
}
&_ => {}
}
}
}
// Commenting continue
if state.commenting {
if c == '\n' {
state.commenting = false;
}
continue;
}
// Stringing begin/end
if c == '\"' && !state.escape_next {
if state.quoting {
let tok_cpy = state.current_token.clone();
state.lexed.push(Token::Literal(Literal::Str(tok_cpy)));
state.current_token = String::new();
state.quoting = false;
} else {
state.current_token = String::new();
state.quoting = true;
}
continue;
} else if state.escape_next {
state.current_token.push(c);
state.escape_next = false;
continue;
}
// Stringing continue
if state.quoting {
if c == '\\' {
state.escape_next = true;
}
state.current_token.push(c);
continue;
}
if c.is_ascii_digit() {
if !state.numbering {
state.numbering = true;
}
} else if state.numbering && !c.is_ascii_digit() {
state.lexed.push(Token::Literal(Literal::Num(
state.current_token.parse::<f64>().unwrap(),
)));
state.current_token = String::new();
state.numbering = false;
}
// Known meaningful tokens
match state.current_token.as_str() {
"\n" => {
continue;
}
"#" => {
state.commenting = true;
state.current_token = String::new();
continue;
}
";" => {
state.lexed.push(Token::Statement(Statement::Terminator));
state.current_token = String::new();
}
"if " => {
state.lexed.push(Token::Statement(Statement::Conditional));
state.current_token = String::new();
continue;
}
"to " => {
state.lexed.push(Token::Statement(Statement::FunctionDef));
state.current_token = String::new();
continue;
}
"for " => {
state.lexed.push(Token::Statement(Statement::ForLoop));
state.current_token = String::new();
continue;
}
"while " => {
state.lexed.push(Token::Statement(Statement::WhileLoop));
state.current_token = String::new();
continue;
}
"(" => {t w
state.lexed.push(Token::Bracket(Bracket::Open));
state.current_token = String::new();
continue;
}
")" => {
state.lexed.push(Token::Bracket(Bracket::Close));
state.current_token = String::new();
continue;
}
"*" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Multiply));
state.current_token = String::new();
continue;
}
"/" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Divide));
state.current_token = String::new();
continue;
}
"+" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Add));
state.current_token = String::new();
continue;
}
"-" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Subtract));
state.current_token = String::new();
continue;
}
"^" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Exponentiate));
state.current_token = String::new();
continue;
}
"%" => {
state.lexed.push(Token::ArithOperator(ArithOperator::Reduce));
state.current_token = String::new();
continue;
}
&_ => {}
}
state.current_token.push(c);
dbg!("End", &c, &state);
dbg!(&c, &state, &current_token, &lexed);
}
Ok(state.lexed)
Ok(lexed)
}

View file

@ -47,13 +47,3 @@ pub enum Token {
Bracket(Bracket),
Variable(variables::Variable),
}
#[derive(Debug)]
pub struct LexerMachine {
pub current_token: String,
pub lexed: Vec<Token>,
pub quoting: bool,
pub commenting: bool,
pub numbering: bool,
pub escape_next: bool,
}

View file

@ -44,7 +44,7 @@ fn main() -> Result<(), Box<dyn Error>> {
Ok(lexed) => lexed,
};
dbg!(lexed.len());
dbg!(lexed.len(), lexed);
Ok(())
}