lexer refactor (and first working version!)
This commit is contained in:
parent
82e43a271e
commit
821073e88a
|
@ -1,156 +1,152 @@
|
||||||
use crate::lexutil;
|
use crate::lexutil;
|
||||||
use lexutil::{Bracket, Literal, Statement, ArithOperator, Token};
|
use lexutil::{ArithOperator, Bracket, Literal, Statement, Token};
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum State {
|
||||||
|
Stringing,
|
||||||
|
Commenting,
|
||||||
|
Numbering,
|
||||||
|
BuildingToken,
|
||||||
|
}
|
||||||
|
|
||||||
pub fn lexer(text_source: &str) -> Result<Vec<Token>, Box<dyn Error>> {
|
pub fn lexer(text_source: &str) -> Result<Vec<Token>, Box<dyn Error>> {
|
||||||
println!("Lexing!");
|
println!("Lexing!");
|
||||||
|
|
||||||
let mut state = lexutil::LexerMachine {
|
let mut current_token = String::new();
|
||||||
current_token: String::new(),
|
let mut lexed = Vec::new();
|
||||||
lexed: Vec::new(),
|
let mut state: State = State::BuildingToken;
|
||||||
quoting: false,
|
|
||||||
commenting: false,
|
|
||||||
numbering: false,
|
|
||||||
escape_next: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
for (i, c) in text_source.chars().enumerate() {
|
let mut chars = text_source.chars().peekable();
|
||||||
dbg!("Begin", &c, &state);
|
|
||||||
|
|
||||||
// Commenting end
|
while let Some(c) = chars.next() {
|
||||||
if state.commenting && c == '\n' {
|
match state {
|
||||||
state.commenting = false;
|
State::Commenting => {
|
||||||
continue;
|
// Stop commenting at end of line
|
||||||
}
|
|
||||||
|
|
||||||
// Commenting continue
|
|
||||||
if state.commenting {
|
|
||||||
if c == '\n' {
|
if c == '\n' {
|
||||||
state.commenting = false;
|
state = State::BuildingToken;
|
||||||
}
|
}
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
State::Stringing => {
|
||||||
// Stringing begin/end
|
// If next char is an unescaped quote
|
||||||
if c == '\"' && !state.escape_next {
|
if let Some(c_peek) = chars.peek() {
|
||||||
if state.quoting {
|
if c != '\\' && *c_peek == '\"' {
|
||||||
let tok_cpy = state.current_token.clone();
|
dbg!("hi");
|
||||||
state.lexed.push(Token::Literal(Literal::Str(tok_cpy)));
|
chars.next();
|
||||||
state.current_token = String::new();
|
current_token.push(c);
|
||||||
state.quoting = false;
|
let tok_cpy = current_token.clone();
|
||||||
|
lexed.push(Token::Literal(Literal::Str(tok_cpy)));
|
||||||
|
state = State::BuildingToken;
|
||||||
|
current_token = String::new();
|
||||||
} else {
|
} else {
|
||||||
state.current_token = String::new();
|
current_token.push(c);
|
||||||
state.quoting = true;
|
|
||||||
}
|
}
|
||||||
continue;
|
} else {
|
||||||
} else if state.escape_next {
|
dbg!("h");
|
||||||
state.current_token.push(c);
|
continue; // we're at the end. we should bring a user error
|
||||||
state.escape_next = false;
|
// because this string was not properly delimited
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stringing continue
|
|
||||||
if state.quoting {
|
|
||||||
if c == '\\' {
|
|
||||||
state.escape_next = true;
|
|
||||||
}
|
}
|
||||||
state.current_token.push(c);
|
State::Numbering => {
|
||||||
continue;
|
// If next char isn't numeric, is at end of this number literal
|
||||||
}
|
if let Some(c_peek) = chars.peek() {
|
||||||
|
current_token.push(c);
|
||||||
if c.is_ascii_digit() {
|
if !c_peek.is_ascii_digit() {
|
||||||
if !state.numbering {
|
lexed.push(Token::Literal(Literal::Num(
|
||||||
state.numbering = true;
|
// if this unwrap fails, we've failed
|
||||||
}
|
// to confirm that this is a number literal
|
||||||
} else if state.numbering && !c.is_ascii_digit() {
|
current_token.parse::<f64>().unwrap(),
|
||||||
state.lexed.push(Token::Literal(Literal::Num(
|
|
||||||
state.current_token.parse::<f64>().unwrap(),
|
|
||||||
)));
|
)));
|
||||||
state.current_token = String::new();
|
state = State::BuildingToken;
|
||||||
state.numbering = false;
|
current_token = String::new();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
continue; // we're at the end. not a problem because
|
||||||
|
// numbers self-terminate
|
||||||
|
}
|
||||||
|
}
|
||||||
|
State::BuildingToken => {
|
||||||
|
if c == '\"' {
|
||||||
|
state = State::Stringing;
|
||||||
|
current_token = String::new();
|
||||||
|
// We don't need to push c because it's the quote delimiter,
|
||||||
|
// which has already served its purpose as an indicator
|
||||||
|
continue;
|
||||||
|
} else if c.is_ascii_digit() {
|
||||||
|
state = State::Numbering;
|
||||||
|
current_token = c.to_string();
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Known meaningful tokens
|
// Known meaningful tokens
|
||||||
match state.current_token.as_str() {
|
current_token.push(c);
|
||||||
|
match current_token.as_str() {
|
||||||
"\n" => {
|
"\n" => {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
"#" => {
|
"#" => {
|
||||||
state.commenting = true;
|
state = State::Commenting;
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
";" => {
|
";" => {
|
||||||
state.lexed.push(Token::Statement(Statement::Terminator));
|
lexed.push(Token::Statement(Statement::Terminator));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
}
|
}
|
||||||
"if " => {
|
"if " => {
|
||||||
state.lexed.push(Token::Statement(Statement::Conditional));
|
lexed.push(Token::Statement(Statement::Conditional));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"to " => {
|
"to " => {
|
||||||
state.lexed.push(Token::Statement(Statement::FunctionDef));
|
lexed.push(Token::Statement(Statement::FunctionDef));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"for " => {
|
"for " => {
|
||||||
state.lexed.push(Token::Statement(Statement::ForLoop));
|
lexed.push(Token::Statement(Statement::ForLoop));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"while " => {
|
"while " => {
|
||||||
state.lexed.push(Token::Statement(Statement::WhileLoop));
|
lexed.push(Token::Statement(Statement::WhileLoop));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"(" => {t w
|
"(" => {
|
||||||
state.lexed.push(Token::Bracket(Bracket::Open));
|
lexed.push(Token::Bracket(Bracket::Open));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
")" => {
|
")" => {
|
||||||
state.lexed.push(Token::Bracket(Bracket::Close));
|
lexed.push(Token::Bracket(Bracket::Close));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"*" => {
|
"*" => {
|
||||||
state.lexed.push(Token::ArithOperator(ArithOperator::Multiply));
|
lexed.push(Token::ArithOperator(ArithOperator::Multiply));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"/" => {
|
"/" => {
|
||||||
state.lexed.push(Token::ArithOperator(ArithOperator::Divide));
|
lexed.push(Token::ArithOperator(ArithOperator::Divide));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"+" => {
|
"+" => {
|
||||||
state.lexed.push(Token::ArithOperator(ArithOperator::Add));
|
lexed.push(Token::ArithOperator(ArithOperator::Add));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"-" => {
|
"-" => {
|
||||||
state.lexed.push(Token::ArithOperator(ArithOperator::Subtract));
|
lexed.push(Token::ArithOperator(ArithOperator::Subtract));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"^" => {
|
"^" => {
|
||||||
state.lexed.push(Token::ArithOperator(ArithOperator::Exponentiate));
|
lexed.push(Token::ArithOperator(ArithOperator::Exponentiate));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
"%" => {
|
"%" => {
|
||||||
state.lexed.push(Token::ArithOperator(ArithOperator::Reduce));
|
lexed.push(Token::ArithOperator(ArithOperator::Reduce));
|
||||||
state.current_token = String::new();
|
current_token = String::new();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
&_ => {}
|
&_ => {}
|
||||||
}
|
}
|
||||||
state.current_token.push(c);
|
}
|
||||||
dbg!("End", &c, &state);
|
}
|
||||||
|
dbg!(&c, &state, ¤t_token, &lexed);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(state.lexed)
|
Ok(lexed)
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,13 +47,3 @@ pub enum Token {
|
||||||
Bracket(Bracket),
|
Bracket(Bracket),
|
||||||
Variable(variables::Variable),
|
Variable(variables::Variable),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct LexerMachine {
|
|
||||||
pub current_token: String,
|
|
||||||
pub lexed: Vec<Token>,
|
|
||||||
pub quoting: bool,
|
|
||||||
pub commenting: bool,
|
|
||||||
pub numbering: bool,
|
|
||||||
pub escape_next: bool,
|
|
||||||
}
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||||
Ok(lexed) => lexed,
|
Ok(lexed) => lexed,
|
||||||
};
|
};
|
||||||
|
|
||||||
dbg!(lexed.len());
|
dbg!(lexed.len(), lexed);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue