From 7fdc97b6194aca1c7249812ee63d74e84efadf95 Mon Sep 17 00:00:00 2001 From: core Date: Sun, 21 Jan 2024 01:30:19 -0500 Subject: [PATCH] lexer v2 --- .idea/kabel.iml | 2 + kabel/src/main.rs | 55 ++-- libkabel/src/diagnostics/emitters/basic.rs | 7 +- libkabel/src/diagnostics/emitters/mod.rs | 4 +- libkabel/src/diagnostics/emitters/pretty.rs | 109 +++++-- libkabel/src/diagnostics/emitters/util.rs | 20 -- libkabel/src/diagnostics/macros.rs | 111 +++++++ libkabel/src/diagnostics/mod.rs | 28 +- libkabel/src/diagnostics/span.rs | 4 +- libkabel/src/error.rs | 5 +- libkabel/src/formatter.rs | 11 + libkabel/src/lexer/mod.rs | 327 ++++++++------------ libkabel/src/lexer/token.rs | 111 ++++--- libkabel/src/lib.rs | 8 +- libkabel/src/source.rs | 76 +++++ 15 files changed, 558 insertions(+), 320 deletions(-) delete mode 100644 libkabel/src/diagnostics/emitters/util.rs create mode 100644 libkabel/src/diagnostics/macros.rs create mode 100644 libkabel/src/formatter.rs create mode 100644 libkabel/src/source.rs diff --git a/.idea/kabel.iml b/.idea/kabel.iml index eaf88de..03258df 100644 --- a/.idea/kabel.iml +++ b/.idea/kabel.iml @@ -2,8 +2,10 @@ + + diff --git a/kabel/src/main.rs b/kabel/src/main.rs index f56a743..642b780 100644 --- a/kabel/src/main.rs +++ b/kabel/src/main.rs @@ -1,11 +1,12 @@ -use std::env; -use std::error::Error; -use std::fs::File; -use std::io::{self, Read}; +use colored::Colorize; use libkabel::diagnostics::emitters::Emitter; use libkabel::error::KError; +use libkabel::formatter::print_tts; use libkabel::lexer::token::Token; -use colored::Colorize; +use libkabel::source::SourceFile; +use std::error::Error; +use std::io; +use std::{env, fs}; fn main() -> Result<(), Box> { let argv: Vec = env::args().collect(); @@ -14,13 +15,13 @@ fn main() -> Result<(), Box> { std::process::exit(1); } - let mut source_fd = match File::open(&argv[1]) { + let text_source = match fs::read_to_string(&argv[1]) { Err(e) if e.kind() == io::ErrorKind::NotFound => { std::process::exit(1); } Err(e) => { eprintln!( - "error: Tried opening file `{}' and got unexpected error: {}", + "error: Tried reading file `{}' and got unexpected error: {}", argv[1], e.kind() ); @@ -31,26 +32,42 @@ fn main() -> Result<(), Box> { tracing_subscriber::fmt::init(); - let mut text_source = String::new(); - source_fd.read_to_string(&mut text_source)?; + let source = SourceFile::new(text_source, argv[1].clone()); // Lex! - let lexed: Vec = match libkabel::lexer::lexer(&text_source) { + let lexed: Vec = match libkabel::lexer::lexer(source) { Err(e) => { match e { KError::InternalError(e) => { - eprintln!("{} {}", - "error: Internal Kabel error!".red(), - "THIS IS NOT YOUR FAULT.".bold().red()); - eprintln!("{}", "error: Please report this error to the kabel developers along with your".red()); + eprintln!( + "{} {}", + "error: Internal Kabel error!".red(), + "THIS IS NOT YOUR FAULT.".bold().red() + ); + eprintln!( + "{}", + "error: Please report this error to the kabel developers along with your" + .red() + ); eprintln!("{}", "fail: program's complete source code. Either"); - eprintln!("{}", "fail: - Open an Issue at https://git.e3t.cc/tm85/kabel, or".red()); - eprintln!("{}", "fail: - E-Mail the developers at kabel@e3t.cc".red()); + eprintln!( + "{}", + "fail: - Open an Issue at https://git.e3t.cc/tm85/kabel, or".red() + ); + eprintln!( + "{}", + "fail: - E-Mail the developers at kabel@e3t.cc".red() + ); eprintln!("{} {:?}", "fail: Error message follows:".red(), e); } - KError::UserError(diags) => { + KError::UserError(diags, src_map) => { //eprintln!("{}", libkabel::diagnostics::emitters::basic::BasicEmitter::emit(diags, text_source)); - eprintln!("{}", libkabel::diagnostics::emitters::pretty::PrettyEmitter::emit(diags, text_source, argv[1].clone())); + eprintln!( + "{}", + libkabel::diagnostics::emitters::pretty::PrettyEmitter::emit( + diags, src_map + ) + ); } } std::process::exit(1); @@ -58,7 +75,7 @@ fn main() -> Result<(), Box> { Ok(lexed) => lexed, }; - println!("{:#?}", lexed); + print_tts(&lexed); Ok(()) } diff --git a/libkabel/src/diagnostics/emitters/basic.rs b/libkabel/src/diagnostics/emitters/basic.rs index 73dddd2..397545e 100644 --- a/libkabel/src/diagnostics/emitters/basic.rs +++ b/libkabel/src/diagnostics/emitters/basic.rs @@ -1,5 +1,6 @@ -use crate::diagnostics::Diagnostic; use crate::diagnostics::emitters::Emitter; +use crate::diagnostics::Diagnostic; +use crate::source::SourceFile; use std::fmt::Write; pub struct BasicEmitter; @@ -7,7 +8,7 @@ pub struct BasicEmitter; impl Emitter for BasicEmitter { type Output = String; - fn emit(diag: Vec, _source: String, _source_name: String) -> Self::Output { + fn emit(diag: Vec, _source: SourceFile) -> Self::Output { let mut output = String::new(); for msg in diag { @@ -16,4 +17,4 @@ impl Emitter for BasicEmitter { output } -} \ No newline at end of file +} diff --git a/libkabel/src/diagnostics/emitters/mod.rs b/libkabel/src/diagnostics/emitters/mod.rs index 41c7463..1b18c9c 100644 --- a/libkabel/src/diagnostics/emitters/mod.rs +++ b/libkabel/src/diagnostics/emitters/mod.rs @@ -1,11 +1,11 @@ use crate::diagnostics::Diagnostic; +use crate::source::SourceFile; pub mod basic; #[cfg(feature = "pretty-emitter")] pub mod pretty; -pub(crate) mod util; pub trait Emitter { type Output; - fn emit(diag: Vec, source: String, source_name: String) -> Self::Output; + fn emit(diag: Vec, source: SourceFile) -> Self::Output; } diff --git a/libkabel/src/diagnostics/emitters/pretty.rs b/libkabel/src/diagnostics/emitters/pretty.rs index 0b1bed1..bdec888 100644 --- a/libkabel/src/diagnostics/emitters/pretty.rs +++ b/libkabel/src/diagnostics/emitters/pretty.rs @@ -1,19 +1,18 @@ -use crate::diagnostics::{Diagnostic, DiagnosticType}; use crate::diagnostics::emitters::Emitter; -use std::fmt::Write; +use crate::diagnostics::{Diagnostic, DiagnosticType}; +use crate::source::SourceFile; use colored::{ColoredString, Colorize}; -use crate::diagnostics::emitters::util::{get_line, pos_to_line_col}; +use std::fmt::Write; pub struct PrettyEmitter; impl Emitter for PrettyEmitter { type Output = String; - fn emit(diag: Vec, source: String, source_name: String) -> Self::Output { + fn emit(diag: Vec, source_map: SourceFile) -> Self::Output { let mut output = String::new(); for msg in diag { - match msg.diag_type { DiagnosticType::Error => { write!(output, "{}", "error".bold().red()).unwrap(); @@ -21,7 +20,9 @@ impl Emitter for PrettyEmitter { DiagnosticType::Warning => { write!(output, "{}", "warning".bold().yellow()).unwrap(); } - _ => { continue; } + _ => { + continue; + } } writeln!(output, ": {}", msg.message.bold()).unwrap(); @@ -29,8 +30,8 @@ impl Emitter for PrettyEmitter { let mut biggest_line_no = 0; for span in &msg.spans { - let (s_line, _) = pos_to_line_col(span.span.start, &source); - let (e_line, _) = pos_to_line_col(span.span.start, &source); + let ((s_line, _), (e_line, _)) = source_map.span_position(&span.span); + if s_line > biggest_line_no { biggest_line_no = s_line; } @@ -42,28 +43,49 @@ impl Emitter for PrettyEmitter { let line_no_padding = biggest_line_no.to_string().len(); for labeled_span in &msg.spans { - let (line, start_col) = pos_to_line_col(labeled_span.span.start, &source); - let (_, end_col) = pos_to_line_col(labeled_span.span.end, &source); + let ((line, start_col), (_, end_col)) = + source_map.span_position(&labeled_span.span); - writeln!(output, "{}{} {}:{}:{}", " ".repeat(line_no_padding), "-->".bright_blue().bold(), source_name, line, start_col).unwrap(); + writeln!( + output, + "{}{} {}:{}:{}", + " ".repeat(line_no_padding), + "-->".bright_blue().bold(), + source_map.filename(), + line + 1, + start_col + 1 + ) + .unwrap(); let line_hdr_padding = line.to_string().len() + 1; - writeln!(output, "{}{}", " ".repeat(line_hdr_padding), "|".bright_blue().bold()).unwrap(); - writeln!(output, "{}{} {} {}", line.to_string().bright_blue().bold(), " ".repeat(line_no_padding - line.to_string().len()), "|".bright_blue().bold(), get_line(line-1, &source).unwrap_or("")).unwrap(); - - + writeln!( + output, + "{}{}", + " ".repeat(line_hdr_padding), + "|".bright_blue().bold() + ) + .unwrap(); + writeln!( + output, + "{}{} {} {}", + (line + 1).to_string().bright_blue().bold(), + " ".repeat(line_no_padding - line.to_string().len()), + "|".bright_blue().bold(), + source_map.line_at(line).unwrap_or("") + ) + .unwrap(); let mut end_char = match labeled_span.span_type { DiagnosticType::Error => '^'.to_string().bold().red(), DiagnosticType::Warning => '^'.to_string().bold().yellow(), - DiagnosticType::Help => { + DiagnosticType::Help | DiagnosticType::Hint => { if labeled_span.label.is_some() { '-'.to_string().bold().bright_blue() } else { '~'.to_string().bold().bright_blue() } - }, + } DiagnosticType::SecondaryError => { if labeled_span.label.is_some() { '-'.to_string().bold().red() @@ -77,13 +99,12 @@ impl Emitter for PrettyEmitter { (_, None) => ColoredString::from(""), (DiagnosticType::Error, Some(e)) => e.bold().red(), (DiagnosticType::Warning, Some(e)) => e.bold().yellow(), - (DiagnosticType::Help, Some(e)) => e.bold().bright_blue(), - (DiagnosticType::SecondaryError, Some(e)) => e.bold().red() + (DiagnosticType::Help, Some(e)) | (DiagnosticType::Hint, Some(e)) => { + e.bold().bright_blue() + } + (DiagnosticType::SecondaryError, Some(e)) => e.bold().red(), }; - println!("{} {}", labeled_span.span.start, labeled_span.span.end); - println!("{} {}", end_col, start_col); - let underline_length = if end_col == start_col { end_char = "".into(); 1 @@ -95,14 +116,50 @@ impl Emitter for PrettyEmitter { let underline = match labeled_span.span_type { DiagnosticType::Error => "^".repeat(underline_length).to_string().bold().red(), - DiagnosticType::Warning => "^".repeat(underline_length).to_string().bold().yellow(), - DiagnosticType::Help => "~".repeat(underline_length).to_string().bold().bright_blue(), - DiagnosticType::SecondaryError => "~".repeat(underline_length).to_string().bold().red() + DiagnosticType::Warning => { + "^".repeat(underline_length).to_string().bold().yellow() + } + DiagnosticType::Help | DiagnosticType::Hint => "~" + .repeat(underline_length) + .to_string() + .bold() + .bright_blue(), + DiagnosticType::SecondaryError => { + "~".repeat(underline_length).to_string().bold().red() + } }; - writeln!(output, "{}{} {}{}{} {}", " ".repeat(line_hdr_padding), "|".bright_blue().bold(), " ".repeat(start_col-1), underline, end_char, message).unwrap(); + writeln!( + output, + "{}{}{}{}{} {}", + " ".repeat(line_hdr_padding), + "|".bright_blue().bold(), + " ".repeat(start_col), + underline, + end_char, + message + ) + .unwrap(); writeln!(output).unwrap(); } + + for hint in &msg.hints { + /* + error: + hint: + help: + warning: + + */ + let hdr = match hint.hint_type { + DiagnosticType::Error => "error: ".bold().red(), + DiagnosticType::Warning => "warning: ".bold().yellow(), + DiagnosticType::Help => "help: ".bright_blue().bold(), + DiagnosticType::Hint => "hint: ".bold(), + DiagnosticType::SecondaryError => "error: ".bold().red(), + }; + writeln!(output, "{}{}", hdr, hint.message).unwrap(); + } } output diff --git a/libkabel/src/diagnostics/emitters/util.rs b/libkabel/src/diagnostics/emitters/util.rs deleted file mode 100644 index 060a6e1..0000000 --- a/libkabel/src/diagnostics/emitters/util.rs +++ /dev/null @@ -1,20 +0,0 @@ -pub fn pos_to_line_col(pos: usize, source: &str) -> (usize, usize) { - let mut line = 1; - let mut col = 1; - for (n, c) in source.chars().enumerate() { - if c == '\n' { - line += 1; - col = 1; - } else { - col += 1; - } - if n == pos { - break; - } - } - (line, col) -} - -pub fn get_line(line: usize, source: &str) -> Option<&str> { - source.split('\n').nth(line) -} diff --git a/libkabel/src/diagnostics/macros.rs b/libkabel/src/diagnostics/macros.rs new file mode 100644 index 0000000..e0e114a --- /dev/null +++ b/libkabel/src/diagnostics/macros.rs @@ -0,0 +1,111 @@ +#[macro_export] +macro_rules! diag { + (error,$m:expr,$( $span:expr )*) => { + $crate::diagnostics::Diagnostic { + diag_type: $crate::diagnostics::DiagnosticType::Error, + message: $m.to_string(), + spans: vec![ + $( $span )* + ], + hints: vec![] + } + }; + (warn,$m:expr,$( $span:expr )*) => { + $crate::diagnostics::Diagnostic { + diag_type: $crate::diagnostics::DiagnosticType::Warning, + message: $m.to_string(), + spans: vec![ + $( $span )* + ], + hints: vec![] + } + }; +} + +#[macro_export] +macro_rules! span { + (at: $at:expr) => { + $crate::diagnostics::span::Span::new($at, $at) + }; + (from: $from:expr, to: $to:expr) => { + $crate::diagnostics::span::Span::new($from, $to) + }; +} + +#[macro_export] +macro_rules! assemble_label { + ($ty:expr,$span:expr,$msg:expr) => { + $crate::diagnostics::SpanWithLabel { + span_type: $ty, + span: $span, + label: $msg, + } + }; +} + +#[macro_export] +macro_rules! label_span { + (type: error,$span:expr) => { + $crate::assemble_label!($crate::diagnostics::DiagnosticType::Error, $span, None) + }; + (type: error,$span:expr,$msg:expr) => { + $crate::assemble_label!( + $crate::diagnostics::DiagnosticType::Error, + $span, + Some($msg.to_string()) + ) + }; + + (type: secondary_error,$span:expr) => { + $crate::assemble_label!( + $crate::diagnostics::DiagnosticType::SecondaryError, + $span, + None + ) + }; + (type: secondary_error,$span:expr,$msg:expr) => { + $crate::assemble_label!( + $crate::diagnostics::DiagnosticType::SecondaryError, + $span, + Some($msg.to_string()) + ) + }; + + (type: warning,$span:expr) => { + $crate::assemble_label!($crate::diagnostics::DiagnosticType::Warning, $span, None) + }; + (type: warning,$span:expr,$msg:expr) => { + $crate::assemble_label!( + $crate::diagnostics::DiagnosticType::Warning, + $span, + Some($msg.to_string()) + ) + }; + + (type: help,$span:expr) => { + $crate::assemble_label!($crate::diagnostics::DiagnosticType::Help, $span, None) + }; + (type: help,$span:expr,$msg:expr) => { + $crate::assemble_label!( + $crate::diagnostics::DiagnosticType::Help, + $span, + Some($msg.to_string()) + ) + }; +} + +#[macro_export] +macro_rules! hint { + (help: $msg:expr) => { + $crate::diagnostics::DiagnosticHint { + hint_type: $crate::diagnostics::DiagnosticType::Help, + message: $msg.to_string(), + } + }; + (hint: $msg:expr) => { + $crate::diagnostics::DiagnosticHint { + hint_type: $crate::diagnostics::DiagnosticType::Hint, + message: $msg.to_string(), + } + }; +} diff --git a/libkabel/src/diagnostics/mod.rs b/libkabel/src/diagnostics/mod.rs index 23bf121..d3e1755 100644 --- a/libkabel/src/diagnostics/mod.rs +++ b/libkabel/src/diagnostics/mod.rs @@ -1,13 +1,16 @@ use crate::diagnostics::span::Span; -pub mod span; pub mod emitters; +pub mod span; +#[macro_use] +pub mod macros; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Diagnostic { pub diag_type: DiagnosticType, pub spans: Vec, pub message: String, + pub hints: Vec, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -15,7 +18,8 @@ pub enum DiagnosticType { Error, Warning, Help, - SecondaryError + Hint, + SecondaryError, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -23,4 +27,22 @@ pub struct SpanWithLabel { pub span: Span, pub span_type: DiagnosticType, pub label: Option, -} \ No newline at end of file +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DiagnosticHint { + pub hint_type: DiagnosticType, + pub message: String, +} + +impl Diagnostic { + pub fn with_hint(mut self, hint: DiagnosticHint) -> Self { + self.hints.push(hint); + Self { + diag_type: self.diag_type, + spans: self.spans, + message: self.message, + hints: self.hints, + } + } +} diff --git a/libkabel/src/diagnostics/span.rs b/libkabel/src/diagnostics/span.rs index 6160636..f44ae4a 100644 --- a/libkabel/src/diagnostics/span.rs +++ b/libkabel/src/diagnostics/span.rs @@ -1,11 +1,11 @@ #[derive(Debug, Clone, PartialEq, Eq, Copy)] pub struct Span { pub start: usize, - pub end: usize + pub end: usize, } impl Span { pub fn new(start: usize, end: usize) -> Self { Self { start, end } } -} \ No newline at end of file +} diff --git a/libkabel/src/error.rs b/libkabel/src/error.rs index 0bab296..9a3f2f9 100644 --- a/libkabel/src/error.rs +++ b/libkabel/src/error.rs @@ -1,10 +1,11 @@ use crate::diagnostics::Diagnostic; +use crate::source::SourceFile; #[derive(Debug, Clone, PartialEq, Eq)] pub enum KError { InternalError(InternalError), - UserError(Vec) + UserError(Vec, SourceFile), } #[derive(Debug, Clone, PartialEq, Eq)] -pub enum InternalError {} \ No newline at end of file +pub enum InternalError {} diff --git a/libkabel/src/formatter.rs b/libkabel/src/formatter.rs new file mode 100644 index 0000000..d352a2b --- /dev/null +++ b/libkabel/src/formatter.rs @@ -0,0 +1,11 @@ +use crate::lexer::token::Token; +use std::fmt::Write; +use tracing::debug; + +pub fn print_tts(tokens: &[Token]) { + let mut out = String::new(); + for token in tokens { + write!(out, "{:?} ", token.tt).unwrap(); + } + debug!("{out}"); +} diff --git a/libkabel/src/lexer/mod.rs b/libkabel/src/lexer/mod.rs index 7c5cbc0..b944bb6 100644 --- a/libkabel/src/lexer/mod.rs +++ b/libkabel/src/lexer/mod.rs @@ -1,225 +1,152 @@ -use token::{ArithOperator, Bracket, Literal, Statement, Token}; -use tracing::debug; -use crate::diagnostics::{Diagnostic, DiagnosticType, SpanWithLabel}; -use crate::diagnostics::span::Span; use crate::error::KError; +use crate::lexer::token::{Token, TokenType}; +use crate::source::SourceFile; +use crate::{diag, hint, label_span, span}; +#[macro_use] pub mod token; -#[derive(Debug)] -enum State { - Stringing, - Commenting, - Numbering, - BuildingToken, -} +pub fn lexer(mut source: SourceFile) -> Result, KError> { + let mut tokens = vec![]; + let mut errors = vec![]; -pub fn lexer(text_source: &str) -> Result, KError> { - debug!("lexing!"); + 'main: while let Some(c) = source.next() { + match c { + '(' => tokens.push(token!(at: source.pos(), TokenType::LeftParenthesis)), + ')' => tokens.push(token!(at: source.pos(), TokenType::RightParenthesis)), - let mut current_token = String::new(); - let mut lexed = Vec::new(); - let mut state: State = State::BuildingToken; + '-' => tokens.push(token!(at: source.pos(), TokenType::Minus)), + '+' => tokens.push(token!(at: source.pos(), TokenType::Plus)), + '*' => tokens.push(token!(at: source.pos(), TokenType::Star)), - let mut chars = text_source.chars().peekable(); + ',' => tokens.push(token!(at: source.pos(), TokenType::Comma)), - let mut pos: usize = 0; - let mut span_start: usize = 0; + ';' => tokens.push(token!(at: source.pos(), TokenType::Semicolon)), + ':' => tokens.push(token!(at: source.pos(), TokenType::Colon)), + '=' => tokens.push(token!(at: source.pos(), TokenType::Equals)), - while let Some(c) = chars.next() { - pos += 1; + n1 if n1.is_ascii_digit() => { + source.start_token(); + let mut num_lit = String::from(n1); - match state { - State::Commenting => { - // Stop commenting at end of line - if c == '\n' { - state = State::BuildingToken; - } - } - State::Stringing => { - // If next char is an unescaped quote - // TODO: when possible, make this 1 `if'. Ability to - // do that remains unimplemented, hence the stupid copied - // code below. - if c != '\n' { - if let Some(c_peek) = chars.peek() { - if c != '\\' && *c_peek == '\"' { - chars.next(); - pos += 1; - - current_token.push(c); - let tok_cpy = current_token.clone(); - lexed.push(Token::Literal(Span::new(span_start, pos), Literal::Str(tok_cpy))); - - state = State::BuildingToken; - current_token = String::new(); - } else { - current_token.push(c); - } + while let Some(c) = source.peek() { + if c.is_ascii_digit() { + num_lit.push(source.next().expect("unreachable")) } else { - return Err(KError::UserError(vec![ - Diagnostic { - diag_type: DiagnosticType::Error, - message: "unterminated string literal".to_string(), - spans: vec![ - SpanWithLabel { - span: Span::new(span_start-1, span_start-1), - span_type: DiagnosticType::SecondaryError, - label: Some("string began here".to_string()) - }, - SpanWithLabel { - span: Span::new(pos, pos), - span_type: DiagnosticType::Error, - label: Some("expected end quote here".to_string()) - } - ] - } - ])); + break; } - } else { - return Err(KError::UserError(vec![ - Diagnostic { - diag_type: DiagnosticType::Error, - message: "unterminated string literal".to_string(), - spans: vec![ - SpanWithLabel { - span: Span::new(span_start-1, span_start-1), - span_type: DiagnosticType::SecondaryError, - label: Some("string began here".to_string()) - }, - SpanWithLabel { - span: Span::new(pos, pos), - span_type: DiagnosticType::Error, - label: Some("expected end quote here".to_string()) + } + + if source.peek() == Some('.') { + if let Some(two) = source.peek_two() { + if two.is_ascii_digit() { + num_lit.push(source.next().expect("unreachable")); + while let Some(c) = source.peek() { + if c.is_ascii_digit() { + num_lit.push(source.next().expect("unreachable")) + } else { + break; } - ] - } - ])); - } - } - State::Numbering => { - current_token.push(c); - // If next char isn't numeric, is at end of this number literal - if let Some(c_peek) = chars.peek() { - if !c_peek.is_ascii_digit() { - let num = match current_token.parse::() { - Ok(n) => n, - Err(_) => { - debug!("{} {}", span_start, pos); - return Err(KError::UserError(vec![ - Diagnostic { - message: "invalid numeric literal".to_string(), - diag_type: DiagnosticType::Error, - spans: vec![ - SpanWithLabel { - span: Span::new(span_start, pos), - span_type: DiagnosticType::Error, - label: Some("this is not a valid numeric literal".to_string()), - } - ], - } - ])); } - }; + } + } + } - lexed.push(Token::Literal( - Span::new(span_start, pos), - Literal::Num(num) - )); - state = State::BuildingToken; - current_token = String::new(); + // try to parse the lit + let val: f64 = match num_lit.parse() { + Ok(val) => val, + Err(_) => { + errors.push(diag!(error, "Invalid numeric literal", label_span!(type: error, source.end_token(), "this is not a valid numeric literal"))); + break 'main; + } + }; + + tokens.push(token!(span: source.end_token(), TokenType::NumericLiteral(val))); + } + + c1 if c1.is_ascii_alphabetic() || c1 == '_' => { + source.start_token(); + let mut ident = String::from(c1); + + while let Some(c) = source.peek() { + if c.is_ascii_alphanumeric() || c == '_' { + ident.push(source.next().expect("unreachable")); + } else { + break; + } + } + + let span = source.end_token(); + + tokens.push(match ident.as_str() { + "to" => token!(span: span, TokenType::To), + "with" => token!(span: span, TokenType::With), + "for" => token!(span: span, TokenType::For), + "in" => token!(span: span, TokenType::In), + _ => token!(span: span, TokenType::Identifier(ident)), + }) + } + + '\"' => { + source.start_token(); + let mut string_lit = String::new(); + + while let Some(c) = source.peek() { + if c != '\"' { + string_lit.push(source.next().expect("unreachable")); + } else { + source.next(); // consume the " + break; + } + } + + let span = source.end_token(); + + tokens.push(token!(span: span, TokenType::StringLiteral(string_lit))); + } + + '#' => { + // read until end of while + while let Some(c2) = source.next() { + if c2 == '\n' { + break; } } } - State::BuildingToken => { - if c == '\"' { - state = State::Stringing; - current_token = String::new(); - // We don't need to push c because it's the quote delimiter, - // which has already served its purpose as an indicator - span_start = pos; - continue; - } else if c.is_ascii_digit() { - state = State::Numbering; - current_token = c.to_string(); - span_start = pos; - continue; + w if w.is_whitespace() => {} + unknown => { + let mut err = diag!( + error, + format!("Unexpected character `{unknown}`"), + label_span!(type: error, span!(at: source.pos()), "here") + ); + + if let Some(tkn) = tokens.last() { + match tkn.tt { + TokenType::NumericLiteral(_) => { + err = err + .with_hint(hint!(help: "last token detected was a numeric literal")) + .with_hint( + hint!(help: "this is most likely an invalid numeric literal"), + ); + if unknown == '.' { + err = err.with_hint(hint!(hint: "unexpected character was a `.`, did you forget to add a 0 at the end (e.g. `5.` instead of `5.0`)?")); + } + } + _ => {} + } } - // Known meaningful tokens - - if current_token.is_empty() { - span_start = pos; - } - - current_token.push(c); - - match current_token.as_str() { - "\n" => { - continue; - } - "#" => { - state = State::Commenting; - current_token = String::new(); - } - ";" => { - lexed.push(Token::Terminator(Span::new(span_start, pos))); - current_token = String::new(); - } - "if " => { - lexed.push(Token::Statement(Span::new(span_start, pos), Statement::Conditional)); - current_token = String::new(); - } - "to " => { - lexed.push(Token::Statement(Span::new(span_start, pos), Statement::FunctionDef)); - current_token = String::new(); - } - "for " => { - lexed.push(Token::Statement(Span::new(span_start, pos), Statement::ForLoop)); - current_token = String::new(); - } - "while " => { - lexed.push(Token::Statement(Span::new(span_start, pos), Statement::WhileLoop)); - current_token = String::new(); - } - "(" => { - lexed.push(Token::Bracket(Span::new(span_start, pos), Bracket::Open)); - current_token = String::new(); - } - ")" => { - lexed.push(Token::Bracket(Span::new(span_start, pos), Bracket::Close)); - current_token = String::new(); - } - "*" => { - lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Multiply)); - current_token = String::new(); - } - "/" => { - lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Divide)); - current_token = String::new(); - } - "+" => { - lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Add)); - current_token = String::new(); - } - "-" => { - lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Subtract)); - current_token = String::new(); - } - "^" => { - lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Exponentiate)); - current_token = String::new(); - } - "%" => { - lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Reduce)); - current_token = String::new(); - } - _ => {} - } + errors.push(err); + // fatal error - exit loop + break; } } - debug!("{} {:?} {} {:?}", &c, &state, ¤t_token, &lexed); } - Ok(lexed) + if !errors.is_empty() { + Err(KError::UserError(errors, source)) + } else { + Ok(tokens) + } } diff --git a/libkabel/src/lexer/token.rs b/libkabel/src/lexer/token.rs index 1be67ce..ef1232b 100644 --- a/libkabel/src/lexer/token.rs +++ b/libkabel/src/lexer/token.rs @@ -1,51 +1,82 @@ use crate::diagnostics::span::Span; -use crate::variables; +use std::fmt::{Display, Formatter}; -// parts of Token - -#[derive(Debug)] -pub enum ArithOperator { - Add, - Subtract, - Multiply, - Divide, - Exponentiate, - Reduce, +#[derive(Debug, PartialEq, Clone)] +pub struct Token { + pub tt: TokenType, + pub span: Span, } -#[derive(Debug)] -pub enum BooleanOperator { - And, - Or, +#[derive(Debug, PartialEq, Clone)] +pub enum TokenType { + LeftParenthesis, // ( + RightParenthesis, // ) + Comma, // , + + Minus, // - + Plus, // + + Star, // * + + Semicolon, + Colon, + Equals, + + To, + With, + For, + In, + + Identifier(String), + NumericLiteral(f64), + StringLiteral(String), } -#[derive(Debug)] -pub enum Literal { - Str(String), - Num(f64), +impl Display for Token { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.tt) + } } -#[derive(Debug)] -pub enum Statement { - Conditional, - ForLoop, - WhileLoop, - FunctionDef, +impl Display for TokenType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + TokenType::LeftParenthesis => write!(f, "("), + TokenType::RightParenthesis => write!(f, ")"), + TokenType::Comma => write!(f, ","), + TokenType::Minus => write!(f, "-"), + TokenType::Plus => write!(f, "+"), + TokenType::Star => write!(f, "*"), + TokenType::Semicolon => write!(f, ";\n"), + TokenType::To => write!(f, "to"), + TokenType::Identifier(i) => write!(f, "{i}"), + TokenType::Colon => write!(f, ":"), + TokenType::Equals => write!(f, "="), + TokenType::NumericLiteral(val) => write!(f, "{val}"), + TokenType::StringLiteral(val) => write!(f, "\"{val}\""), + TokenType::With => write!(f, "with"), + TokenType::For => write!(f, "for"), + TokenType::In => write!(f, "in"), + } + } } -#[derive(Debug)] -pub enum Bracket { - Open, - Close, -} - -#[derive(Debug)] -pub enum Token { - Literal(Span, Literal), - ArithOperator(Span, ArithOperator), - Statement(Span, Statement), - Bracket(Span, Bracket), - Variable(Span, variables::Variable), - Terminator(Span), - +macro_rules! token { + (start: $start:expr, end: $end:expr, $tt:expr) => { + $crate::lexer::token::Token { + tt: $tt, + span: $crate::diagnostics::span::Span::new($start, $end), + } + }; + (at: $at:expr, $tt:expr) => { + $crate::lexer::token::Token { + tt: $tt, + span: $crate::diagnostics::span::Span::new($at, $at), + } + }; + (span: $span:expr, $tt:expr) => { + $crate::lexer::token::Token { + tt: $tt, + span: $span, + } + }; } diff --git a/libkabel/src/lib.rs b/libkabel/src/lib.rs index 0889a89..2bb6eba 100644 --- a/libkabel/src/lib.rs +++ b/libkabel/src/lib.rs @@ -1,5 +1,7 @@ -pub mod modules; -pub mod variables; pub mod diagnostics; +pub mod error; +pub mod formatter; pub mod lexer; -pub mod error; \ No newline at end of file +pub mod modules; +pub mod source; +pub mod variables; diff --git a/libkabel/src/source.rs b/libkabel/src/source.rs new file mode 100644 index 0000000..6b43b5a --- /dev/null +++ b/libkabel/src/source.rs @@ -0,0 +1,76 @@ +use crate::diagnostics::span::Span; + +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct SourceFile { + inner_original: String, + pos: usize, + current_token_start_pos: Option, + name: String, +} +impl SourceFile { + pub fn new(source: String, name: String) -> Self { + SourceFile { + inner_original: source, + pos: 0, + current_token_start_pos: None, + name, + } + } + + pub fn next(&mut self) -> Option { + self.pos += 1; + self.inner_original.chars().nth(self.pos - 1) + } + + pub fn peek(&self) -> Option { + self.inner_original.chars().nth(self.pos) + } + pub fn peek_two(&self) -> Option { + self.inner_original.chars().nth(self.pos + 1) + } + + pub fn pos(&self) -> usize { + self.pos + } + + pub fn start_token(&mut self) { + self.current_token_start_pos = Some(self.pos) + } + pub fn end_token(&mut self) -> Span { + if let Some(start) = self.current_token_start_pos { + self.current_token_start_pos = None; + Span::new(start, self.pos) + } else { + panic!("tried to end a token when none was started"); + } + } + + pub fn position_of(&self, pos: &usize) -> (usize, usize) { + let mut line = 0; + let mut col = 0; + for (n, c) in self.inner_original.chars().enumerate() { + if c == '\n' { + line += 1; + col = 0; + } else { + col += 1; + } + if n == *pos { + break; + } + } + (line, col) + } + + pub fn span_position(&self, span: &Span) -> ((usize, usize), (usize, usize)) { + (self.position_of(&span.start), self.position_of(&span.end)) + } + + pub fn line_at(&self, line: usize) -> Option<&str> { + self.inner_original.split('\n').nth(line) + } + + pub fn filename(&self) -> &str { + &self.name + } +}