lexer refactor (and first working version!)
This commit is contained in:
parent
82e43a271e
commit
821073e88a
3 changed files with 141 additions and 155 deletions
|
@ -1,156 +1,152 @@
|
|||
use crate::lexutil;
|
||||
use lexutil::{Bracket, Literal, Statement, ArithOperator, Token};
|
||||
use lexutil::{ArithOperator, Bracket, Literal, Statement, Token};
|
||||
use std::error::Error;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum State {
|
||||
Stringing,
|
||||
Commenting,
|
||||
Numbering,
|
||||
BuildingToken,
|
||||
}
|
||||
|
||||
pub fn lexer(text_source: &str) -> Result<Vec<Token>, Box<dyn Error>> {
|
||||
println!("Lexing!");
|
||||
|
||||
let mut state = lexutil::LexerMachine {
|
||||
current_token: String::new(),
|
||||
lexed: Vec::new(),
|
||||
quoting: false,
|
||||
commenting: false,
|
||||
numbering: false,
|
||||
escape_next: false,
|
||||
};
|
||||
let mut current_token = String::new();
|
||||
let mut lexed = Vec::new();
|
||||
let mut state: State = State::BuildingToken;
|
||||
|
||||
for (i, c) in text_source.chars().enumerate() {
|
||||
dbg!("Begin", &c, &state);
|
||||
let mut chars = text_source.chars().peekable();
|
||||
|
||||
// Commenting end
|
||||
if state.commenting && c == '\n' {
|
||||
state.commenting = false;
|
||||
continue;
|
||||
while let Some(c) = chars.next() {
|
||||
match state {
|
||||
State::Commenting => {
|
||||
// Stop commenting at end of line
|
||||
if c == '\n' {
|
||||
state = State::BuildingToken;
|
||||
}
|
||||
}
|
||||
State::Stringing => {
|
||||
// If next char is an unescaped quote
|
||||
if let Some(c_peek) = chars.peek() {
|
||||
if c != '\\' && *c_peek == '\"' {
|
||||
dbg!("hi");
|
||||
chars.next();
|
||||
current_token.push(c);
|
||||
let tok_cpy = current_token.clone();
|
||||
lexed.push(Token::Literal(Literal::Str(tok_cpy)));
|
||||
state = State::BuildingToken;
|
||||
current_token = String::new();
|
||||
} else {
|
||||
current_token.push(c);
|
||||
}
|
||||
} else {
|
||||
dbg!("h");
|
||||
continue; // we're at the end. we should bring a user error
|
||||
// because this string was not properly delimited
|
||||
}
|
||||
}
|
||||
State::Numbering => {
|
||||
// If next char isn't numeric, is at end of this number literal
|
||||
if let Some(c_peek) = chars.peek() {
|
||||
current_token.push(c);
|
||||
if !c_peek.is_ascii_digit() {
|
||||
lexed.push(Token::Literal(Literal::Num(
|
||||
// if this unwrap fails, we've failed
|
||||
// to confirm that this is a number literal
|
||||
current_token.parse::<f64>().unwrap(),
|
||||
)));
|
||||
state = State::BuildingToken;
|
||||
current_token = String::new();
|
||||
}
|
||||
} else {
|
||||
continue; // we're at the end. not a problem because
|
||||
// numbers self-terminate
|
||||
}
|
||||
}
|
||||
State::BuildingToken => {
|
||||
if c == '\"' {
|
||||
state = State::Stringing;
|
||||
current_token = String::new();
|
||||
// We don't need to push c because it's the quote delimiter,
|
||||
// which has already served its purpose as an indicator
|
||||
continue;
|
||||
} else if c.is_ascii_digit() {
|
||||
state = State::Numbering;
|
||||
current_token = c.to_string();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Known meaningful tokens
|
||||
current_token.push(c);
|
||||
match current_token.as_str() {
|
||||
"\n" => {
|
||||
continue;
|
||||
}
|
||||
"#" => {
|
||||
state = State::Commenting;
|
||||
current_token = String::new();
|
||||
}
|
||||
";" => {
|
||||
lexed.push(Token::Statement(Statement::Terminator));
|
||||
current_token = String::new();
|
||||
}
|
||||
"if " => {
|
||||
lexed.push(Token::Statement(Statement::Conditional));
|
||||
current_token = String::new();
|
||||
}
|
||||
"to " => {
|
||||
lexed.push(Token::Statement(Statement::FunctionDef));
|
||||
current_token = String::new();
|
||||
}
|
||||
"for " => {
|
||||
lexed.push(Token::Statement(Statement::ForLoop));
|
||||
current_token = String::new();
|
||||
}
|
||||
"while " => {
|
||||
lexed.push(Token::Statement(Statement::WhileLoop));
|
||||
current_token = String::new();
|
||||
}
|
||||
"(" => {
|
||||
lexed.push(Token::Bracket(Bracket::Open));
|
||||
current_token = String::new();
|
||||
}
|
||||
")" => {
|
||||
lexed.push(Token::Bracket(Bracket::Close));
|
||||
current_token = String::new();
|
||||
}
|
||||
"*" => {
|
||||
lexed.push(Token::ArithOperator(ArithOperator::Multiply));
|
||||
current_token = String::new();
|
||||
}
|
||||
"/" => {
|
||||
lexed.push(Token::ArithOperator(ArithOperator::Divide));
|
||||
current_token = String::new();
|
||||
}
|
||||
"+" => {
|
||||
lexed.push(Token::ArithOperator(ArithOperator::Add));
|
||||
current_token = String::new();
|
||||
}
|
||||
"-" => {
|
||||
lexed.push(Token::ArithOperator(ArithOperator::Subtract));
|
||||
current_token = String::new();
|
||||
}
|
||||
"^" => {
|
||||
lexed.push(Token::ArithOperator(ArithOperator::Exponentiate));
|
||||
current_token = String::new();
|
||||
}
|
||||
"%" => {
|
||||
lexed.push(Token::ArithOperator(ArithOperator::Reduce));
|
||||
current_token = String::new();
|
||||
}
|
||||
|
||||
&_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Commenting continue
|
||||
if state.commenting {
|
||||
if c == '\n' {
|
||||
state.commenting = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Stringing begin/end
|
||||
if c == '\"' && !state.escape_next {
|
||||
if state.quoting {
|
||||
let tok_cpy = state.current_token.clone();
|
||||
state.lexed.push(Token::Literal(Literal::Str(tok_cpy)));
|
||||
state.current_token = String::new();
|
||||
state.quoting = false;
|
||||
} else {
|
||||
state.current_token = String::new();
|
||||
state.quoting = true;
|
||||
}
|
||||
continue;
|
||||
} else if state.escape_next {
|
||||
state.current_token.push(c);
|
||||
state.escape_next = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Stringing continue
|
||||
if state.quoting {
|
||||
if c == '\\' {
|
||||
state.escape_next = true;
|
||||
}
|
||||
state.current_token.push(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
if c.is_ascii_digit() {
|
||||
if !state.numbering {
|
||||
state.numbering = true;
|
||||
}
|
||||
} else if state.numbering && !c.is_ascii_digit() {
|
||||
state.lexed.push(Token::Literal(Literal::Num(
|
||||
state.current_token.parse::<f64>().unwrap(),
|
||||
)));
|
||||
state.current_token = String::new();
|
||||
state.numbering = false;
|
||||
}
|
||||
|
||||
// Known meaningful tokens
|
||||
match state.current_token.as_str() {
|
||||
"\n" => {
|
||||
continue;
|
||||
}
|
||||
"#" => {
|
||||
state.commenting = true;
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
";" => {
|
||||
state.lexed.push(Token::Statement(Statement::Terminator));
|
||||
state.current_token = String::new();
|
||||
}
|
||||
"if " => {
|
||||
state.lexed.push(Token::Statement(Statement::Conditional));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"to " => {
|
||||
state.lexed.push(Token::Statement(Statement::FunctionDef));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"for " => {
|
||||
state.lexed.push(Token::Statement(Statement::ForLoop));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"while " => {
|
||||
state.lexed.push(Token::Statement(Statement::WhileLoop));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"(" => {t w
|
||||
state.lexed.push(Token::Bracket(Bracket::Open));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
")" => {
|
||||
state.lexed.push(Token::Bracket(Bracket::Close));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"*" => {
|
||||
state.lexed.push(Token::ArithOperator(ArithOperator::Multiply));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"/" => {
|
||||
state.lexed.push(Token::ArithOperator(ArithOperator::Divide));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"+" => {
|
||||
state.lexed.push(Token::ArithOperator(ArithOperator::Add));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"-" => {
|
||||
state.lexed.push(Token::ArithOperator(ArithOperator::Subtract));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"^" => {
|
||||
state.lexed.push(Token::ArithOperator(ArithOperator::Exponentiate));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"%" => {
|
||||
state.lexed.push(Token::ArithOperator(ArithOperator::Reduce));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
&_ => {}
|
||||
}
|
||||
state.current_token.push(c);
|
||||
dbg!("End", &c, &state);
|
||||
dbg!(&c, &state, ¤t_token, &lexed);
|
||||
}
|
||||
|
||||
Ok(state.lexed)
|
||||
Ok(lexed)
|
||||
}
|
||||
|
|
|
@ -47,13 +47,3 @@ pub enum Token {
|
|||
Bracket(Bracket),
|
||||
Variable(variables::Variable),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LexerMachine {
|
||||
pub current_token: String,
|
||||
pub lexed: Vec<Token>,
|
||||
pub quoting: bool,
|
||||
pub commenting: bool,
|
||||
pub numbering: bool,
|
||||
pub escape_next: bool,
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||
Ok(lexed) => lexed,
|
||||
};
|
||||
|
||||
dbg!(lexed.len());
|
||||
dbg!(lexed.len(), lexed);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue