more progress on lexer
This commit is contained in:
parent
b75812aa59
commit
227cddf0d4
|
@ -1,45 +1,107 @@
|
||||||
use crate::lexutil;
|
use crate::lexutil;
|
||||||
use lexutil::{Token, StatementToken};
|
use lexutil::{Bracket, Literal, Statement, Token};
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
pub fn lexer(text_source: &str) -> Result<Vec<Token>, Box<dyn Error>> {
|
pub fn lexer(text_source: &str) -> Result<Vec<Token>, Box<dyn Error>> {
|
||||||
println!("Lexing!");
|
println!("Lexing!");
|
||||||
let mut lexed: Vec<Token> = Vec::new();
|
|
||||||
|
|
||||||
let mut current_token = String::new();
|
let mut state = lexutil::LexerMachine {
|
||||||
|
current_token: String::new(),
|
||||||
|
lexed: Vec::new(),
|
||||||
|
quoting: false,
|
||||||
|
commenting: false,
|
||||||
|
numbering: false,
|
||||||
|
};
|
||||||
|
|
||||||
for (i, c) in text_source.chars().enumerate() {
|
for (i, c) in text_source.chars().enumerate() {
|
||||||
current_token.push(c);
|
dbg!("{} into {}", &c, &state);
|
||||||
|
|
||||||
|
if state.commenting && c == '\n' {
|
||||||
|
state.commenting = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: descriptive error
|
||||||
|
if c == '\"' && state.current_token.clone().pop().ok_or(panic!()) != Ok('\\') {
|
||||||
|
if state.quoting {
|
||||||
|
let mut tok_cpy = state.current_token.clone();
|
||||||
|
tok_cpy.pop();
|
||||||
|
state.lexed.push(Token::Literal(Literal::Str(tok_cpy)));
|
||||||
|
state.current_token = String::new();
|
||||||
|
state.quoting = false;
|
||||||
|
} else {
|
||||||
|
state.current_token = String::new();
|
||||||
|
state.quoting = true;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if state.commenting {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if state.quoting {
|
||||||
|
state.current_token.push(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.is_ascii_digit() {
|
||||||
|
if !state.numbering {
|
||||||
|
state.numbering = true;
|
||||||
|
}
|
||||||
|
} else if state.numbering && !c.is_ascii_digit() {
|
||||||
|
state.lexed.push(Token::Literal(Literal::Num(state.current_token.parse::<f64>().unwrap())));
|
||||||
|
state.current_token = String::new();
|
||||||
|
state.numbering = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
state.current_token.push(c);
|
||||||
|
|
||||||
// Known meaningful tokens
|
// Known meaningful tokens
|
||||||
match current_token.as_str() {
|
match state.current_token.as_str() {
|
||||||
|
"\n" => {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
"#" => {
|
||||||
|
state.commenting = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
";" => {
|
";" => {
|
||||||
lexed.push(Token::Statement(StatementToken::Terminator));
|
state.lexed.push(Token::Statement(Statement::Terminator));
|
||||||
current_token = String::new();
|
state.current_token = String::new();
|
||||||
}
|
}
|
||||||
"if " => {
|
"if " => {
|
||||||
lexed.push(Token::Statement(StatementToken::Conditional));
|
state.lexed.push(Token::Statement(Statement::Conditional));
|
||||||
current_token = String::new();
|
state.current_token = String::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
"to " => {
|
"to " => {
|
||||||
lexed.push(Token::Statement(StatementToken::FunctionDef));
|
state.lexed.push(Token::Statement(Statement::FunctionDef));
|
||||||
current_token = String::new();
|
state.current_token = String::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
"for " => {
|
"for " => {
|
||||||
lexed.push(Token::Statement(StatementToken::ForLoop));
|
state.lexed.push(Token::Statement(Statement::ForLoop));
|
||||||
current_token = String::new();
|
state.current_token = String::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
"while " => {
|
"while " => {
|
||||||
lexed.push(Token::Statement(StatementToken::WhileLoop));
|
state.lexed.push(Token::Statement(Statement::WhileLoop));
|
||||||
current_token = String::new();
|
state.current_token = String::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
"(" => {
|
||||||
|
state.lexed.push(Token::Bracket(Bracket::Open));
|
||||||
|
state.current_token = String::new();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
")" => {
|
||||||
|
state.lexed.push(Token::Bracket(Bracket::Close));
|
||||||
|
state.current_token = String::new();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
&_ => {}
|
&_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(lexed)
|
Ok(state.lexed)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
use crate::variables;
|
use crate::variables;
|
||||||
|
|
||||||
|
|
||||||
// parts of Token
|
// parts of Token
|
||||||
|
|
||||||
pub enum ArithmeticOperatorToken {
|
#[derive(Debug)]
|
||||||
|
pub enum ArithmeticOperator {
|
||||||
Add,
|
Add,
|
||||||
Subtract,
|
Subtract,
|
||||||
Multiply,
|
Multiply,
|
||||||
|
@ -12,17 +12,20 @@ pub enum ArithmeticOperatorToken {
|
||||||
Modulus,
|
Modulus,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum BooleanOperatorToken {
|
#[derive(Debug)]
|
||||||
|
pub enum BooleanOperator {
|
||||||
And,
|
And,
|
||||||
Or,
|
Or,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum LiteralToken {
|
#[derive(Debug)]
|
||||||
|
pub enum Literal {
|
||||||
Str(String),
|
Str(String),
|
||||||
Num(f64),
|
Num(f64),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum StatementToken {
|
#[derive(Debug)]
|
||||||
|
pub enum Statement {
|
||||||
Conditional,
|
Conditional,
|
||||||
ForLoop,
|
ForLoop,
|
||||||
WhileLoop,
|
WhileLoop,
|
||||||
|
@ -30,17 +33,26 @@ pub enum StatementToken {
|
||||||
Terminator,
|
Terminator,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum BracketToken {
|
#[derive(Debug)]
|
||||||
|
pub enum Bracket {
|
||||||
Open,
|
Open,
|
||||||
Close,
|
Close,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub enum Token {
|
pub enum Token {
|
||||||
Literal(LiteralToken),
|
Literal(Literal),
|
||||||
ArithmeticOperator(ArithmeticOperatorToken),
|
ArithmeticOperator(ArithmeticOperator),
|
||||||
Statement(StatementToken),
|
Statement(Statement),
|
||||||
Bracket(BracketToken),
|
Bracket(Bracket),
|
||||||
Variable(variables::VariableToken),
|
Variable(variables::Variable),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct LexerMachine {
|
||||||
|
pub current_token: String,
|
||||||
|
pub lexed: Vec<Token>,
|
||||||
|
pub quoting: bool,
|
||||||
|
pub commenting: bool,
|
||||||
|
pub numbering: bool,
|
||||||
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
|
use lexutil::Token;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, Read};
|
use std::io::{self, Read};
|
||||||
use lexutil::Token;
|
|
||||||
|
|
||||||
mod lexer;
|
mod lexer;
|
||||||
mod lexutil;
|
mod lexutil;
|
||||||
|
@ -38,14 +38,13 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||||
// Lex!
|
// Lex!
|
||||||
let lexed: Vec<Token> = match lexer::lexer(&text_source) {
|
let lexed: Vec<Token> = match lexer::lexer(&text_source) {
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!(
|
eprintln!("Lexer fail with {}", e);
|
||||||
"Lexer fail with {}",
|
|
||||||
e
|
|
||||||
);
|
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
Ok(lexed) => lexed
|
Ok(lexed) => lexed,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
dbg!("Lexed length {}", lexed.len());
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
struct Thruster {}
|
#[derive(Debug)]
|
||||||
|
pub struct Thruster {}
|
||||||
|
|
||||||
pub enum ModuleVar {
|
#[derive(Debug)]
|
||||||
|
pub enum Module {
|
||||||
Thruster(Thruster),
|
Thruster(Thruster),
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
"string lit";
|
||||||
|
("string lit");# comment
|
||||||
|
12345*(60+80);
|
|
@ -1,7 +1,8 @@
|
||||||
use crate::modules;
|
use crate::modules;
|
||||||
|
|
||||||
pub enum VariableToken {
|
#[derive(Debug)]
|
||||||
Module(modules::ModuleVar),
|
pub enum Variable {
|
||||||
|
Module(modules::Module),
|
||||||
Num(f64),
|
Num(f64),
|
||||||
Str(String),
|
Str(String),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue