more progress on lexer
This commit is contained in:
parent
b75812aa59
commit
227cddf0d4
|
@ -1,45 +1,107 @@
|
|||
use crate::lexutil;
|
||||
use lexutil::{Token, StatementToken};
|
||||
use lexutil::{Bracket, Literal, Statement, Token};
|
||||
use std::error::Error;
|
||||
|
||||
pub fn lexer(text_source: &str) -> Result<Vec<Token>, Box<dyn Error>> {
|
||||
println!("Lexing!");
|
||||
let mut lexed: Vec<Token> = Vec::new();
|
||||
|
||||
let mut current_token = String::new();
|
||||
let mut state = lexutil::LexerMachine {
|
||||
current_token: String::new(),
|
||||
lexed: Vec::new(),
|
||||
quoting: false,
|
||||
commenting: false,
|
||||
numbering: false,
|
||||
};
|
||||
|
||||
for (i, c) in text_source.chars().enumerate() {
|
||||
current_token.push(c);
|
||||
dbg!("{} into {}", &c, &state);
|
||||
|
||||
if state.commenting && c == '\n' {
|
||||
state.commenting = false;
|
||||
}
|
||||
|
||||
// TODO: descriptive error
|
||||
if c == '\"' && state.current_token.clone().pop().ok_or(panic!()) != Ok('\\') {
|
||||
if state.quoting {
|
||||
let mut tok_cpy = state.current_token.clone();
|
||||
tok_cpy.pop();
|
||||
state.lexed.push(Token::Literal(Literal::Str(tok_cpy)));
|
||||
state.current_token = String::new();
|
||||
state.quoting = false;
|
||||
} else {
|
||||
state.current_token = String::new();
|
||||
state.quoting = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if state.commenting {
|
||||
continue;
|
||||
}
|
||||
|
||||
if state.quoting {
|
||||
state.current_token.push(c);
|
||||
}
|
||||
|
||||
if c.is_ascii_digit() {
|
||||
if !state.numbering {
|
||||
state.numbering = true;
|
||||
}
|
||||
} else if state.numbering && !c.is_ascii_digit() {
|
||||
state.lexed.push(Token::Literal(Literal::Num(state.current_token.parse::<f64>().unwrap())));
|
||||
state.current_token = String::new();
|
||||
state.numbering = false;
|
||||
}
|
||||
|
||||
state.current_token.push(c);
|
||||
|
||||
// Known meaningful tokens
|
||||
match current_token.as_str() {
|
||||
match state.current_token.as_str() {
|
||||
"\n" => {
|
||||
continue;
|
||||
}
|
||||
"#" => {
|
||||
state.commenting = true;
|
||||
continue;
|
||||
}
|
||||
";" => {
|
||||
lexed.push(Token::Statement(StatementToken::Terminator));
|
||||
current_token = String::new();
|
||||
state.lexed.push(Token::Statement(Statement::Terminator));
|
||||
state.current_token = String::new();
|
||||
}
|
||||
"if " => {
|
||||
lexed.push(Token::Statement(StatementToken::Conditional));
|
||||
current_token = String::new();
|
||||
state.lexed.push(Token::Statement(Statement::Conditional));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"to " => {
|
||||
lexed.push(Token::Statement(StatementToken::FunctionDef));
|
||||
current_token = String::new();
|
||||
state.lexed.push(Token::Statement(Statement::FunctionDef));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"for " => {
|
||||
lexed.push(Token::Statement(StatementToken::ForLoop));
|
||||
current_token = String::new();
|
||||
state.lexed.push(Token::Statement(Statement::ForLoop));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"while " => {
|
||||
lexed.push(Token::Statement(StatementToken::WhileLoop));
|
||||
current_token = String::new();
|
||||
state.lexed.push(Token::Statement(Statement::WhileLoop));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
"(" => {
|
||||
state.lexed.push(Token::Bracket(Bracket::Open));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
")" => {
|
||||
state.lexed.push(Token::Bracket(Bracket::Close));
|
||||
state.current_token = String::new();
|
||||
continue;
|
||||
}
|
||||
|
||||
&_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(lexed)
|
||||
Ok(state.lexed)
|
||||
}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use crate::variables;
|
||||
|
||||
|
||||
// parts of Token
|
||||
|
||||
pub enum ArithmeticOperatorToken {
|
||||
#[derive(Debug)]
|
||||
pub enum ArithmeticOperator {
|
||||
Add,
|
||||
Subtract,
|
||||
Multiply,
|
||||
|
@ -12,17 +12,20 @@ pub enum ArithmeticOperatorToken {
|
|||
Modulus,
|
||||
}
|
||||
|
||||
pub enum BooleanOperatorToken {
|
||||
#[derive(Debug)]
|
||||
pub enum BooleanOperator {
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
pub enum LiteralToken {
|
||||
#[derive(Debug)]
|
||||
pub enum Literal {
|
||||
Str(String),
|
||||
Num(f64),
|
||||
}
|
||||
|
||||
pub enum StatementToken {
|
||||
#[derive(Debug)]
|
||||
pub enum Statement {
|
||||
Conditional,
|
||||
ForLoop,
|
||||
WhileLoop,
|
||||
|
@ -30,17 +33,26 @@ pub enum StatementToken {
|
|||
Terminator,
|
||||
}
|
||||
|
||||
pub enum BracketToken {
|
||||
#[derive(Debug)]
|
||||
pub enum Bracket {
|
||||
Open,
|
||||
Close,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token {
|
||||
Literal(LiteralToken),
|
||||
ArithmeticOperator(ArithmeticOperatorToken),
|
||||
Statement(StatementToken),
|
||||
Bracket(BracketToken),
|
||||
Variable(variables::VariableToken),
|
||||
Literal(Literal),
|
||||
ArithmeticOperator(ArithmeticOperator),
|
||||
Statement(Statement),
|
||||
Bracket(Bracket),
|
||||
Variable(variables::Variable),
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LexerMachine {
|
||||
pub current_token: String,
|
||||
pub lexed: Vec<Token>,
|
||||
pub quoting: bool,
|
||||
pub commenting: bool,
|
||||
pub numbering: bool,
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
use lexutil::Token;
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
use lexutil::Token;
|
||||
|
||||
mod lexer;
|
||||
mod lexutil;
|
||||
|
@ -38,14 +38,13 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||
// Lex!
|
||||
let lexed: Vec<Token> = match lexer::lexer(&text_source) {
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"Lexer fail with {}",
|
||||
e
|
||||
);
|
||||
eprintln!("Lexer fail with {}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
Ok(lexed) => lexed
|
||||
Ok(lexed) => lexed,
|
||||
};
|
||||
|
||||
dbg!("Lexed length {}", lexed.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
struct Thruster {}
|
||||
#[derive(Debug)]
|
||||
pub struct Thruster {}
|
||||
|
||||
pub enum ModuleVar {
|
||||
#[derive(Debug)]
|
||||
pub enum Module {
|
||||
Thruster(Thruster),
|
||||
}
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
"string lit";
|
||||
("string lit");# comment
|
||||
12345*(60+80);
|
|
@ -1,7 +1,8 @@
|
|||
use crate::modules;
|
||||
|
||||
pub enum VariableToken {
|
||||
Module(modules::ModuleVar),
|
||||
#[derive(Debug)]
|
||||
pub enum Variable {
|
||||
Module(modules::Module),
|
||||
Num(f64),
|
||||
Str(String),
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue