lexer v2
This commit is contained in:
parent
e1d004d986
commit
7fdc97b619
15 changed files with 558 additions and 320 deletions
|
@ -2,8 +2,10 @@
|
|||
<module type="EMPTY_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/kabel-rs/src" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/kabel/src" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/libkabel/src" isTestSource="false" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/kabel-rs/target" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
use colored::Colorize;
|
||||
use libkabel::diagnostics::emitters::Emitter;
|
||||
use libkabel::error::KError;
|
||||
use libkabel::formatter::print_tts;
|
||||
use libkabel::lexer::token::Token;
|
||||
use colored::Colorize;
|
||||
use libkabel::source::SourceFile;
|
||||
use std::error::Error;
|
||||
use std::io;
|
||||
use std::{env, fs};
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let argv: Vec<String> = env::args().collect();
|
||||
|
@ -14,13 +15,13 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let mut source_fd = match File::open(&argv[1]) {
|
||||
let text_source = match fs::read_to_string(&argv[1]) {
|
||||
Err(e) if e.kind() == io::ErrorKind::NotFound => {
|
||||
std::process::exit(1);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"error: Tried opening file `{}' and got unexpected error: {}",
|
||||
"error: Tried reading file `{}' and got unexpected error: {}",
|
||||
argv[1],
|
||||
e.kind()
|
||||
);
|
||||
|
@ -31,26 +32,42 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
let mut text_source = String::new();
|
||||
source_fd.read_to_string(&mut text_source)?;
|
||||
let source = SourceFile::new(text_source, argv[1].clone());
|
||||
|
||||
// Lex!
|
||||
let lexed: Vec<Token> = match libkabel::lexer::lexer(&text_source) {
|
||||
let lexed: Vec<Token> = match libkabel::lexer::lexer(source) {
|
||||
Err(e) => {
|
||||
match e {
|
||||
KError::InternalError(e) => {
|
||||
eprintln!("{} {}",
|
||||
"error: Internal Kabel error!".red(),
|
||||
"THIS IS NOT YOUR FAULT.".bold().red());
|
||||
eprintln!("{}", "error: Please report this error to the kabel developers along with your".red());
|
||||
eprintln!(
|
||||
"{} {}",
|
||||
"error: Internal Kabel error!".red(),
|
||||
"THIS IS NOT YOUR FAULT.".bold().red()
|
||||
);
|
||||
eprintln!(
|
||||
"{}",
|
||||
"error: Please report this error to the kabel developers along with your"
|
||||
.red()
|
||||
);
|
||||
eprintln!("{}", "fail: program's complete source code. Either");
|
||||
eprintln!("{}", "fail: - Open an Issue at https://git.e3t.cc/tm85/kabel, or".red());
|
||||
eprintln!("{}", "fail: - E-Mail the developers at kabel@e3t.cc".red());
|
||||
eprintln!(
|
||||
"{}",
|
||||
"fail: - Open an Issue at https://git.e3t.cc/tm85/kabel, or".red()
|
||||
);
|
||||
eprintln!(
|
||||
"{}",
|
||||
"fail: - E-Mail the developers at kabel@e3t.cc".red()
|
||||
);
|
||||
eprintln!("{} {:?}", "fail: Error message follows:".red(), e);
|
||||
}
|
||||
KError::UserError(diags) => {
|
||||
KError::UserError(diags, src_map) => {
|
||||
//eprintln!("{}", libkabel::diagnostics::emitters::basic::BasicEmitter::emit(diags, text_source));
|
||||
eprintln!("{}", libkabel::diagnostics::emitters::pretty::PrettyEmitter::emit(diags, text_source, argv[1].clone()));
|
||||
eprintln!(
|
||||
"{}",
|
||||
libkabel::diagnostics::emitters::pretty::PrettyEmitter::emit(
|
||||
diags, src_map
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
std::process::exit(1);
|
||||
|
@ -58,7 +75,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||
Ok(lexed) => lexed,
|
||||
};
|
||||
|
||||
println!("{:#?}", lexed);
|
||||
print_tts(&lexed);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use crate::diagnostics::Diagnostic;
|
||||
use crate::diagnostics::emitters::Emitter;
|
||||
use crate::diagnostics::Diagnostic;
|
||||
use crate::source::SourceFile;
|
||||
use std::fmt::Write;
|
||||
|
||||
pub struct BasicEmitter;
|
||||
|
@ -7,7 +8,7 @@ pub struct BasicEmitter;
|
|||
impl Emitter for BasicEmitter {
|
||||
type Output = String;
|
||||
|
||||
fn emit(diag: Vec<Diagnostic>, _source: String, _source_name: String) -> Self::Output {
|
||||
fn emit(diag: Vec<Diagnostic>, _source: SourceFile) -> Self::Output {
|
||||
let mut output = String::new();
|
||||
|
||||
for msg in diag {
|
||||
|
@ -16,4 +17,4 @@ impl Emitter for BasicEmitter {
|
|||
|
||||
output
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
use crate::diagnostics::Diagnostic;
|
||||
use crate::source::SourceFile;
|
||||
|
||||
pub mod basic;
|
||||
#[cfg(feature = "pretty-emitter")]
|
||||
pub mod pretty;
|
||||
pub(crate) mod util;
|
||||
|
||||
pub trait Emitter {
|
||||
type Output;
|
||||
fn emit(diag: Vec<Diagnostic>, source: String, source_name: String) -> Self::Output;
|
||||
fn emit(diag: Vec<Diagnostic>, source: SourceFile) -> Self::Output;
|
||||
}
|
||||
|
|
|
@ -1,19 +1,18 @@
|
|||
use crate::diagnostics::{Diagnostic, DiagnosticType};
|
||||
use crate::diagnostics::emitters::Emitter;
|
||||
use std::fmt::Write;
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticType};
|
||||
use crate::source::SourceFile;
|
||||
use colored::{ColoredString, Colorize};
|
||||
use crate::diagnostics::emitters::util::{get_line, pos_to_line_col};
|
||||
use std::fmt::Write;
|
||||
|
||||
pub struct PrettyEmitter;
|
||||
|
||||
impl Emitter for PrettyEmitter {
|
||||
type Output = String;
|
||||
|
||||
fn emit(diag: Vec<Diagnostic>, source: String, source_name: String) -> Self::Output {
|
||||
fn emit(diag: Vec<Diagnostic>, source_map: SourceFile) -> Self::Output {
|
||||
let mut output = String::new();
|
||||
|
||||
for msg in diag {
|
||||
|
||||
match msg.diag_type {
|
||||
DiagnosticType::Error => {
|
||||
write!(output, "{}", "error".bold().red()).unwrap();
|
||||
|
@ -21,7 +20,9 @@ impl Emitter for PrettyEmitter {
|
|||
DiagnosticType::Warning => {
|
||||
write!(output, "{}", "warning".bold().yellow()).unwrap();
|
||||
}
|
||||
_ => { continue; }
|
||||
_ => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(output, ": {}", msg.message.bold()).unwrap();
|
||||
|
@ -29,8 +30,8 @@ impl Emitter for PrettyEmitter {
|
|||
let mut biggest_line_no = 0;
|
||||
|
||||
for span in &msg.spans {
|
||||
let (s_line, _) = pos_to_line_col(span.span.start, &source);
|
||||
let (e_line, _) = pos_to_line_col(span.span.start, &source);
|
||||
let ((s_line, _), (e_line, _)) = source_map.span_position(&span.span);
|
||||
|
||||
if s_line > biggest_line_no {
|
||||
biggest_line_no = s_line;
|
||||
}
|
||||
|
@ -42,28 +43,49 @@ impl Emitter for PrettyEmitter {
|
|||
let line_no_padding = biggest_line_no.to_string().len();
|
||||
|
||||
for labeled_span in &msg.spans {
|
||||
let (line, start_col) = pos_to_line_col(labeled_span.span.start, &source);
|
||||
let (_, end_col) = pos_to_line_col(labeled_span.span.end, &source);
|
||||
let ((line, start_col), (_, end_col)) =
|
||||
source_map.span_position(&labeled_span.span);
|
||||
|
||||
writeln!(output, "{}{} {}:{}:{}", " ".repeat(line_no_padding), "-->".bright_blue().bold(), source_name, line, start_col).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
"{}{} {}:{}:{}",
|
||||
" ".repeat(line_no_padding),
|
||||
"-->".bright_blue().bold(),
|
||||
source_map.filename(),
|
||||
line + 1,
|
||||
start_col + 1
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let line_hdr_padding = line.to_string().len() + 1;
|
||||
|
||||
writeln!(output, "{}{}", " ".repeat(line_hdr_padding), "|".bright_blue().bold()).unwrap();
|
||||
writeln!(output, "{}{} {} {}", line.to_string().bright_blue().bold(), " ".repeat(line_no_padding - line.to_string().len()), "|".bright_blue().bold(), get_line(line-1, &source).unwrap_or("<line unavailable>")).unwrap();
|
||||
|
||||
|
||||
writeln!(
|
||||
output,
|
||||
"{}{}",
|
||||
" ".repeat(line_hdr_padding),
|
||||
"|".bright_blue().bold()
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
"{}{} {} {}",
|
||||
(line + 1).to_string().bright_blue().bold(),
|
||||
" ".repeat(line_no_padding - line.to_string().len()),
|
||||
"|".bright_blue().bold(),
|
||||
source_map.line_at(line).unwrap_or("<line unavailable>")
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut end_char = match labeled_span.span_type {
|
||||
DiagnosticType::Error => '^'.to_string().bold().red(),
|
||||
DiagnosticType::Warning => '^'.to_string().bold().yellow(),
|
||||
DiagnosticType::Help => {
|
||||
DiagnosticType::Help | DiagnosticType::Hint => {
|
||||
if labeled_span.label.is_some() {
|
||||
'-'.to_string().bold().bright_blue()
|
||||
} else {
|
||||
'~'.to_string().bold().bright_blue()
|
||||
}
|
||||
},
|
||||
}
|
||||
DiagnosticType::SecondaryError => {
|
||||
if labeled_span.label.is_some() {
|
||||
'-'.to_string().bold().red()
|
||||
|
@ -77,13 +99,12 @@ impl Emitter for PrettyEmitter {
|
|||
(_, None) => ColoredString::from(""),
|
||||
(DiagnosticType::Error, Some(e)) => e.bold().red(),
|
||||
(DiagnosticType::Warning, Some(e)) => e.bold().yellow(),
|
||||
(DiagnosticType::Help, Some(e)) => e.bold().bright_blue(),
|
||||
(DiagnosticType::SecondaryError, Some(e)) => e.bold().red()
|
||||
(DiagnosticType::Help, Some(e)) | (DiagnosticType::Hint, Some(e)) => {
|
||||
e.bold().bright_blue()
|
||||
}
|
||||
(DiagnosticType::SecondaryError, Some(e)) => e.bold().red(),
|
||||
};
|
||||
|
||||
println!("{} {}", labeled_span.span.start, labeled_span.span.end);
|
||||
println!("{} {}", end_col, start_col);
|
||||
|
||||
let underline_length = if end_col == start_col {
|
||||
end_char = "".into();
|
||||
1
|
||||
|
@ -95,14 +116,50 @@ impl Emitter for PrettyEmitter {
|
|||
|
||||
let underline = match labeled_span.span_type {
|
||||
DiagnosticType::Error => "^".repeat(underline_length).to_string().bold().red(),
|
||||
DiagnosticType::Warning => "^".repeat(underline_length).to_string().bold().yellow(),
|
||||
DiagnosticType::Help => "~".repeat(underline_length).to_string().bold().bright_blue(),
|
||||
DiagnosticType::SecondaryError => "~".repeat(underline_length).to_string().bold().red()
|
||||
DiagnosticType::Warning => {
|
||||
"^".repeat(underline_length).to_string().bold().yellow()
|
||||
}
|
||||
DiagnosticType::Help | DiagnosticType::Hint => "~"
|
||||
.repeat(underline_length)
|
||||
.to_string()
|
||||
.bold()
|
||||
.bright_blue(),
|
||||
DiagnosticType::SecondaryError => {
|
||||
"~".repeat(underline_length).to_string().bold().red()
|
||||
}
|
||||
};
|
||||
|
||||
writeln!(output, "{}{} {}{}{} {}", " ".repeat(line_hdr_padding), "|".bright_blue().bold(), " ".repeat(start_col-1), underline, end_char, message).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
"{}{}{}{}{} {}",
|
||||
" ".repeat(line_hdr_padding),
|
||||
"|".bright_blue().bold(),
|
||||
" ".repeat(start_col),
|
||||
underline,
|
||||
end_char,
|
||||
message
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(output).unwrap();
|
||||
}
|
||||
|
||||
for hint in &msg.hints {
|
||||
/*
|
||||
error:
|
||||
hint:
|
||||
help:
|
||||
warning:
|
||||
|
||||
*/
|
||||
let hdr = match hint.hint_type {
|
||||
DiagnosticType::Error => "error: ".bold().red(),
|
||||
DiagnosticType::Warning => "warning: ".bold().yellow(),
|
||||
DiagnosticType::Help => "help: ".bright_blue().bold(),
|
||||
DiagnosticType::Hint => "hint: ".bold(),
|
||||
DiagnosticType::SecondaryError => "error: ".bold().red(),
|
||||
};
|
||||
writeln!(output, "{}{}", hdr, hint.message).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
pub fn pos_to_line_col(pos: usize, source: &str) -> (usize, usize) {
|
||||
let mut line = 1;
|
||||
let mut col = 1;
|
||||
for (n, c) in source.chars().enumerate() {
|
||||
if c == '\n' {
|
||||
line += 1;
|
||||
col = 1;
|
||||
} else {
|
||||
col += 1;
|
||||
}
|
||||
if n == pos {
|
||||
break;
|
||||
}
|
||||
}
|
||||
(line, col)
|
||||
}
|
||||
|
||||
pub fn get_line(line: usize, source: &str) -> Option<&str> {
|
||||
source.split('\n').nth(line)
|
||||
}
|
111
libkabel/src/diagnostics/macros.rs
Normal file
111
libkabel/src/diagnostics/macros.rs
Normal file
|
@ -0,0 +1,111 @@
|
|||
#[macro_export]
|
||||
macro_rules! diag {
|
||||
(error,$m:expr,$( $span:expr )*) => {
|
||||
$crate::diagnostics::Diagnostic {
|
||||
diag_type: $crate::diagnostics::DiagnosticType::Error,
|
||||
message: $m.to_string(),
|
||||
spans: vec![
|
||||
$( $span )*
|
||||
],
|
||||
hints: vec![]
|
||||
}
|
||||
};
|
||||
(warn,$m:expr,$( $span:expr )*) => {
|
||||
$crate::diagnostics::Diagnostic {
|
||||
diag_type: $crate::diagnostics::DiagnosticType::Warning,
|
||||
message: $m.to_string(),
|
||||
spans: vec![
|
||||
$( $span )*
|
||||
],
|
||||
hints: vec![]
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! span {
|
||||
(at: $at:expr) => {
|
||||
$crate::diagnostics::span::Span::new($at, $at)
|
||||
};
|
||||
(from: $from:expr, to: $to:expr) => {
|
||||
$crate::diagnostics::span::Span::new($from, $to)
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! assemble_label {
|
||||
($ty:expr,$span:expr,$msg:expr) => {
|
||||
$crate::diagnostics::SpanWithLabel {
|
||||
span_type: $ty,
|
||||
span: $span,
|
||||
label: $msg,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! label_span {
|
||||
(type: error,$span:expr) => {
|
||||
$crate::assemble_label!($crate::diagnostics::DiagnosticType::Error, $span, None)
|
||||
};
|
||||
(type: error,$span:expr,$msg:expr) => {
|
||||
$crate::assemble_label!(
|
||||
$crate::diagnostics::DiagnosticType::Error,
|
||||
$span,
|
||||
Some($msg.to_string())
|
||||
)
|
||||
};
|
||||
|
||||
(type: secondary_error,$span:expr) => {
|
||||
$crate::assemble_label!(
|
||||
$crate::diagnostics::DiagnosticType::SecondaryError,
|
||||
$span,
|
||||
None
|
||||
)
|
||||
};
|
||||
(type: secondary_error,$span:expr,$msg:expr) => {
|
||||
$crate::assemble_label!(
|
||||
$crate::diagnostics::DiagnosticType::SecondaryError,
|
||||
$span,
|
||||
Some($msg.to_string())
|
||||
)
|
||||
};
|
||||
|
||||
(type: warning,$span:expr) => {
|
||||
$crate::assemble_label!($crate::diagnostics::DiagnosticType::Warning, $span, None)
|
||||
};
|
||||
(type: warning,$span:expr,$msg:expr) => {
|
||||
$crate::assemble_label!(
|
||||
$crate::diagnostics::DiagnosticType::Warning,
|
||||
$span,
|
||||
Some($msg.to_string())
|
||||
)
|
||||
};
|
||||
|
||||
(type: help,$span:expr) => {
|
||||
$crate::assemble_label!($crate::diagnostics::DiagnosticType::Help, $span, None)
|
||||
};
|
||||
(type: help,$span:expr,$msg:expr) => {
|
||||
$crate::assemble_label!(
|
||||
$crate::diagnostics::DiagnosticType::Help,
|
||||
$span,
|
||||
Some($msg.to_string())
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! hint {
|
||||
(help: $msg:expr) => {
|
||||
$crate::diagnostics::DiagnosticHint {
|
||||
hint_type: $crate::diagnostics::DiagnosticType::Help,
|
||||
message: $msg.to_string(),
|
||||
}
|
||||
};
|
||||
(hint: $msg:expr) => {
|
||||
$crate::diagnostics::DiagnosticHint {
|
||||
hint_type: $crate::diagnostics::DiagnosticType::Hint,
|
||||
message: $msg.to_string(),
|
||||
}
|
||||
};
|
||||
}
|
|
@ -1,13 +1,16 @@
|
|||
use crate::diagnostics::span::Span;
|
||||
|
||||
pub mod span;
|
||||
pub mod emitters;
|
||||
pub mod span;
|
||||
#[macro_use]
|
||||
pub mod macros;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Diagnostic {
|
||||
pub diag_type: DiagnosticType,
|
||||
pub spans: Vec<SpanWithLabel>,
|
||||
pub message: String,
|
||||
pub hints: Vec<DiagnosticHint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
|
@ -15,7 +18,8 @@ pub enum DiagnosticType {
|
|||
Error,
|
||||
Warning,
|
||||
Help,
|
||||
SecondaryError
|
||||
Hint,
|
||||
SecondaryError,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
|
@ -23,4 +27,22 @@ pub struct SpanWithLabel {
|
|||
pub span: Span,
|
||||
pub span_type: DiagnosticType,
|
||||
pub label: Option<String>,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct DiagnosticHint {
|
||||
pub hint_type: DiagnosticType,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl Diagnostic {
|
||||
pub fn with_hint(mut self, hint: DiagnosticHint) -> Self {
|
||||
self.hints.push(hint);
|
||||
Self {
|
||||
diag_type: self.diag_type,
|
||||
spans: self.spans,
|
||||
message: self.message,
|
||||
hints: self.hints,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
pub struct Span {
|
||||
pub start: usize,
|
||||
pub end: usize
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn new(start: usize, end: usize) -> Self {
|
||||
Self { start, end }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use crate::diagnostics::Diagnostic;
|
||||
use crate::source::SourceFile;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum KError {
|
||||
InternalError(InternalError),
|
||||
UserError(Vec<Diagnostic>)
|
||||
UserError(Vec<Diagnostic>, SourceFile),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum InternalError {}
|
||||
pub enum InternalError {}
|
||||
|
|
11
libkabel/src/formatter.rs
Normal file
11
libkabel/src/formatter.rs
Normal file
|
@ -0,0 +1,11 @@
|
|||
use crate::lexer::token::Token;
|
||||
use std::fmt::Write;
|
||||
use tracing::debug;
|
||||
|
||||
pub fn print_tts(tokens: &[Token]) {
|
||||
let mut out = String::new();
|
||||
for token in tokens {
|
||||
write!(out, "{:?} ", token.tt).unwrap();
|
||||
}
|
||||
debug!("{out}");
|
||||
}
|
|
@ -1,225 +1,152 @@
|
|||
use token::{ArithOperator, Bracket, Literal, Statement, Token};
|
||||
use tracing::debug;
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticType, SpanWithLabel};
|
||||
use crate::diagnostics::span::Span;
|
||||
use crate::error::KError;
|
||||
use crate::lexer::token::{Token, TokenType};
|
||||
use crate::source::SourceFile;
|
||||
use crate::{diag, hint, label_span, span};
|
||||
|
||||
#[macro_use]
|
||||
pub mod token;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum State {
|
||||
Stringing,
|
||||
Commenting,
|
||||
Numbering,
|
||||
BuildingToken,
|
||||
}
|
||||
pub fn lexer(mut source: SourceFile) -> Result<Vec<Token>, KError> {
|
||||
let mut tokens = vec![];
|
||||
let mut errors = vec![];
|
||||
|
||||
pub fn lexer(text_source: &str) -> Result<Vec<Token>, KError> {
|
||||
debug!("lexing!");
|
||||
'main: while let Some(c) = source.next() {
|
||||
match c {
|
||||
'(' => tokens.push(token!(at: source.pos(), TokenType::LeftParenthesis)),
|
||||
')' => tokens.push(token!(at: source.pos(), TokenType::RightParenthesis)),
|
||||
|
||||
let mut current_token = String::new();
|
||||
let mut lexed = Vec::new();
|
||||
let mut state: State = State::BuildingToken;
|
||||
'-' => tokens.push(token!(at: source.pos(), TokenType::Minus)),
|
||||
'+' => tokens.push(token!(at: source.pos(), TokenType::Plus)),
|
||||
'*' => tokens.push(token!(at: source.pos(), TokenType::Star)),
|
||||
|
||||
let mut chars = text_source.chars().peekable();
|
||||
',' => tokens.push(token!(at: source.pos(), TokenType::Comma)),
|
||||
|
||||
let mut pos: usize = 0;
|
||||
let mut span_start: usize = 0;
|
||||
';' => tokens.push(token!(at: source.pos(), TokenType::Semicolon)),
|
||||
':' => tokens.push(token!(at: source.pos(), TokenType::Colon)),
|
||||
'=' => tokens.push(token!(at: source.pos(), TokenType::Equals)),
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
pos += 1;
|
||||
n1 if n1.is_ascii_digit() => {
|
||||
source.start_token();
|
||||
let mut num_lit = String::from(n1);
|
||||
|
||||
match state {
|
||||
State::Commenting => {
|
||||
// Stop commenting at end of line
|
||||
if c == '\n' {
|
||||
state = State::BuildingToken;
|
||||
}
|
||||
}
|
||||
State::Stringing => {
|
||||
// If next char is an unescaped quote
|
||||
// TODO: when possible, make this 1 `if'. Ability to
|
||||
// do that remains unimplemented, hence the stupid copied
|
||||
// code below.
|
||||
if c != '\n' {
|
||||
if let Some(c_peek) = chars.peek() {
|
||||
if c != '\\' && *c_peek == '\"' {
|
||||
chars.next();
|
||||
pos += 1;
|
||||
|
||||
current_token.push(c);
|
||||
let tok_cpy = current_token.clone();
|
||||
lexed.push(Token::Literal(Span::new(span_start, pos), Literal::Str(tok_cpy)));
|
||||
|
||||
state = State::BuildingToken;
|
||||
current_token = String::new();
|
||||
} else {
|
||||
current_token.push(c);
|
||||
}
|
||||
while let Some(c) = source.peek() {
|
||||
if c.is_ascii_digit() {
|
||||
num_lit.push(source.next().expect("unreachable"))
|
||||
} else {
|
||||
return Err(KError::UserError(vec![
|
||||
Diagnostic {
|
||||
diag_type: DiagnosticType::Error,
|
||||
message: "unterminated string literal".to_string(),
|
||||
spans: vec![
|
||||
SpanWithLabel {
|
||||
span: Span::new(span_start-1, span_start-1),
|
||||
span_type: DiagnosticType::SecondaryError,
|
||||
label: Some("string began here".to_string())
|
||||
},
|
||||
SpanWithLabel {
|
||||
span: Span::new(pos, pos),
|
||||
span_type: DiagnosticType::Error,
|
||||
label: Some("expected end quote here".to_string())
|
||||
}
|
||||
]
|
||||
}
|
||||
]));
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
return Err(KError::UserError(vec![
|
||||
Diagnostic {
|
||||
diag_type: DiagnosticType::Error,
|
||||
message: "unterminated string literal".to_string(),
|
||||
spans: vec![
|
||||
SpanWithLabel {
|
||||
span: Span::new(span_start-1, span_start-1),
|
||||
span_type: DiagnosticType::SecondaryError,
|
||||
label: Some("string began here".to_string())
|
||||
},
|
||||
SpanWithLabel {
|
||||
span: Span::new(pos, pos),
|
||||
span_type: DiagnosticType::Error,
|
||||
label: Some("expected end quote here".to_string())
|
||||
}
|
||||
|
||||
if source.peek() == Some('.') {
|
||||
if let Some(two) = source.peek_two() {
|
||||
if two.is_ascii_digit() {
|
||||
num_lit.push(source.next().expect("unreachable"));
|
||||
while let Some(c) = source.peek() {
|
||||
if c.is_ascii_digit() {
|
||||
num_lit.push(source.next().expect("unreachable"))
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
]
|
||||
}
|
||||
]));
|
||||
}
|
||||
}
|
||||
State::Numbering => {
|
||||
current_token.push(c);
|
||||
// If next char isn't numeric, is at end of this number literal
|
||||
if let Some(c_peek) = chars.peek() {
|
||||
if !c_peek.is_ascii_digit() {
|
||||
let num = match current_token.parse::<f64>() {
|
||||
Ok(n) => n,
|
||||
Err(_) => {
|
||||
debug!("{} {}", span_start, pos);
|
||||
return Err(KError::UserError(vec![
|
||||
Diagnostic {
|
||||
message: "invalid numeric literal".to_string(),
|
||||
diag_type: DiagnosticType::Error,
|
||||
spans: vec![
|
||||
SpanWithLabel {
|
||||
span: Span::new(span_start, pos),
|
||||
span_type: DiagnosticType::Error,
|
||||
label: Some("this is not a valid numeric literal".to_string()),
|
||||
}
|
||||
],
|
||||
}
|
||||
]));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lexed.push(Token::Literal(
|
||||
Span::new(span_start, pos),
|
||||
Literal::Num(num)
|
||||
));
|
||||
state = State::BuildingToken;
|
||||
current_token = String::new();
|
||||
// try to parse the lit
|
||||
let val: f64 = match num_lit.parse() {
|
||||
Ok(val) => val,
|
||||
Err(_) => {
|
||||
errors.push(diag!(error, "Invalid numeric literal", label_span!(type: error, source.end_token(), "this is not a valid numeric literal")));
|
||||
break 'main;
|
||||
}
|
||||
};
|
||||
|
||||
tokens.push(token!(span: source.end_token(), TokenType::NumericLiteral(val)));
|
||||
}
|
||||
|
||||
c1 if c1.is_ascii_alphabetic() || c1 == '_' => {
|
||||
source.start_token();
|
||||
let mut ident = String::from(c1);
|
||||
|
||||
while let Some(c) = source.peek() {
|
||||
if c.is_ascii_alphanumeric() || c == '_' {
|
||||
ident.push(source.next().expect("unreachable"));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let span = source.end_token();
|
||||
|
||||
tokens.push(match ident.as_str() {
|
||||
"to" => token!(span: span, TokenType::To),
|
||||
"with" => token!(span: span, TokenType::With),
|
||||
"for" => token!(span: span, TokenType::For),
|
||||
"in" => token!(span: span, TokenType::In),
|
||||
_ => token!(span: span, TokenType::Identifier(ident)),
|
||||
})
|
||||
}
|
||||
|
||||
'\"' => {
|
||||
source.start_token();
|
||||
let mut string_lit = String::new();
|
||||
|
||||
while let Some(c) = source.peek() {
|
||||
if c != '\"' {
|
||||
string_lit.push(source.next().expect("unreachable"));
|
||||
} else {
|
||||
source.next(); // consume the "
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let span = source.end_token();
|
||||
|
||||
tokens.push(token!(span: span, TokenType::StringLiteral(string_lit)));
|
||||
}
|
||||
|
||||
'#' => {
|
||||
// read until end of while
|
||||
while let Some(c2) = source.next() {
|
||||
if c2 == '\n' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
State::BuildingToken => {
|
||||
if c == '\"' {
|
||||
state = State::Stringing;
|
||||
current_token = String::new();
|
||||
// We don't need to push c because it's the quote delimiter,
|
||||
// which has already served its purpose as an indicator
|
||||
span_start = pos;
|
||||
continue;
|
||||
} else if c.is_ascii_digit() {
|
||||
state = State::Numbering;
|
||||
current_token = c.to_string();
|
||||
span_start = pos;
|
||||
continue;
|
||||
w if w.is_whitespace() => {}
|
||||
unknown => {
|
||||
let mut err = diag!(
|
||||
error,
|
||||
format!("Unexpected character `{unknown}`"),
|
||||
label_span!(type: error, span!(at: source.pos()), "here")
|
||||
);
|
||||
|
||||
if let Some(tkn) = tokens.last() {
|
||||
match tkn.tt {
|
||||
TokenType::NumericLiteral(_) => {
|
||||
err = err
|
||||
.with_hint(hint!(help: "last token detected was a numeric literal"))
|
||||
.with_hint(
|
||||
hint!(help: "this is most likely an invalid numeric literal"),
|
||||
);
|
||||
if unknown == '.' {
|
||||
err = err.with_hint(hint!(hint: "unexpected character was a `.`, did you forget to add a 0 at the end (e.g. `5.` instead of `5.0`)?"));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Known meaningful tokens
|
||||
|
||||
if current_token.is_empty() {
|
||||
span_start = pos;
|
||||
}
|
||||
|
||||
current_token.push(c);
|
||||
|
||||
match current_token.as_str() {
|
||||
"\n" => {
|
||||
continue;
|
||||
}
|
||||
"#" => {
|
||||
state = State::Commenting;
|
||||
current_token = String::new();
|
||||
}
|
||||
";" => {
|
||||
lexed.push(Token::Terminator(Span::new(span_start, pos)));
|
||||
current_token = String::new();
|
||||
}
|
||||
"if " => {
|
||||
lexed.push(Token::Statement(Span::new(span_start, pos), Statement::Conditional));
|
||||
current_token = String::new();
|
||||
}
|
||||
"to " => {
|
||||
lexed.push(Token::Statement(Span::new(span_start, pos), Statement::FunctionDef));
|
||||
current_token = String::new();
|
||||
}
|
||||
"for " => {
|
||||
lexed.push(Token::Statement(Span::new(span_start, pos), Statement::ForLoop));
|
||||
current_token = String::new();
|
||||
}
|
||||
"while " => {
|
||||
lexed.push(Token::Statement(Span::new(span_start, pos), Statement::WhileLoop));
|
||||
current_token = String::new();
|
||||
}
|
||||
"(" => {
|
||||
lexed.push(Token::Bracket(Span::new(span_start, pos), Bracket::Open));
|
||||
current_token = String::new();
|
||||
}
|
||||
")" => {
|
||||
lexed.push(Token::Bracket(Span::new(span_start, pos), Bracket::Close));
|
||||
current_token = String::new();
|
||||
}
|
||||
"*" => {
|
||||
lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Multiply));
|
||||
current_token = String::new();
|
||||
}
|
||||
"/" => {
|
||||
lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Divide));
|
||||
current_token = String::new();
|
||||
}
|
||||
"+" => {
|
||||
lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Add));
|
||||
current_token = String::new();
|
||||
}
|
||||
"-" => {
|
||||
lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Subtract));
|
||||
current_token = String::new();
|
||||
}
|
||||
"^" => {
|
||||
lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Exponentiate));
|
||||
current_token = String::new();
|
||||
}
|
||||
"%" => {
|
||||
lexed.push(Token::ArithOperator(Span::new(span_start, pos), ArithOperator::Reduce));
|
||||
current_token = String::new();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
errors.push(err);
|
||||
// fatal error - exit loop
|
||||
break;
|
||||
}
|
||||
}
|
||||
debug!("{} {:?} {} {:?}", &c, &state, ¤t_token, &lexed);
|
||||
}
|
||||
|
||||
Ok(lexed)
|
||||
if !errors.is_empty() {
|
||||
Err(KError::UserError(errors, source))
|
||||
} else {
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,51 +1,82 @@
|
|||
use crate::diagnostics::span::Span;
|
||||
use crate::variables;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
// parts of Token
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ArithOperator {
|
||||
Add,
|
||||
Subtract,
|
||||
Multiply,
|
||||
Divide,
|
||||
Exponentiate,
|
||||
Reduce,
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Token {
|
||||
pub tt: TokenType,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BooleanOperator {
|
||||
And,
|
||||
Or,
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum TokenType {
|
||||
LeftParenthesis, // (
|
||||
RightParenthesis, // )
|
||||
Comma, // ,
|
||||
|
||||
Minus, // -
|
||||
Plus, // +
|
||||
Star, // *
|
||||
|
||||
Semicolon,
|
||||
Colon,
|
||||
Equals,
|
||||
|
||||
To,
|
||||
With,
|
||||
For,
|
||||
In,
|
||||
|
||||
Identifier(String),
|
||||
NumericLiteral(f64),
|
||||
StringLiteral(String),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Literal {
|
||||
Str(String),
|
||||
Num(f64),
|
||||
impl Display for Token {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.tt)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Statement {
|
||||
Conditional,
|
||||
ForLoop,
|
||||
WhileLoop,
|
||||
FunctionDef,
|
||||
impl Display for TokenType {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
TokenType::LeftParenthesis => write!(f, "("),
|
||||
TokenType::RightParenthesis => write!(f, ")"),
|
||||
TokenType::Comma => write!(f, ","),
|
||||
TokenType::Minus => write!(f, "-"),
|
||||
TokenType::Plus => write!(f, "+"),
|
||||
TokenType::Star => write!(f, "*"),
|
||||
TokenType::Semicolon => write!(f, ";\n"),
|
||||
TokenType::To => write!(f, "to"),
|
||||
TokenType::Identifier(i) => write!(f, "{i}"),
|
||||
TokenType::Colon => write!(f, ":"),
|
||||
TokenType::Equals => write!(f, "="),
|
||||
TokenType::NumericLiteral(val) => write!(f, "{val}"),
|
||||
TokenType::StringLiteral(val) => write!(f, "\"{val}\""),
|
||||
TokenType::With => write!(f, "with"),
|
||||
TokenType::For => write!(f, "for"),
|
||||
TokenType::In => write!(f, "in"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Bracket {
|
||||
Open,
|
||||
Close,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token {
|
||||
Literal(Span, Literal),
|
||||
ArithOperator(Span, ArithOperator),
|
||||
Statement(Span, Statement),
|
||||
Bracket(Span, Bracket),
|
||||
Variable(Span, variables::Variable),
|
||||
Terminator(Span),
|
||||
|
||||
macro_rules! token {
|
||||
(start: $start:expr, end: $end:expr, $tt:expr) => {
|
||||
$crate::lexer::token::Token {
|
||||
tt: $tt,
|
||||
span: $crate::diagnostics::span::Span::new($start, $end),
|
||||
}
|
||||
};
|
||||
(at: $at:expr, $tt:expr) => {
|
||||
$crate::lexer::token::Token {
|
||||
tt: $tt,
|
||||
span: $crate::diagnostics::span::Span::new($at, $at),
|
||||
}
|
||||
};
|
||||
(span: $span:expr, $tt:expr) => {
|
||||
$crate::lexer::token::Token {
|
||||
tt: $tt,
|
||||
span: $span,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
pub mod modules;
|
||||
pub mod variables;
|
||||
pub mod diagnostics;
|
||||
pub mod error;
|
||||
pub mod formatter;
|
||||
pub mod lexer;
|
||||
pub mod error;
|
||||
pub mod modules;
|
||||
pub mod source;
|
||||
pub mod variables;
|
||||
|
|
76
libkabel/src/source.rs
Normal file
76
libkabel/src/source.rs
Normal file
|
@ -0,0 +1,76 @@
|
|||
use crate::diagnostics::span::Span;
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
pub struct SourceFile {
|
||||
inner_original: String,
|
||||
pos: usize,
|
||||
current_token_start_pos: Option<usize>,
|
||||
name: String,
|
||||
}
|
||||
impl SourceFile {
|
||||
pub fn new(source: String, name: String) -> Self {
|
||||
SourceFile {
|
||||
inner_original: source,
|
||||
pos: 0,
|
||||
current_token_start_pos: None,
|
||||
name,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Option<char> {
|
||||
self.pos += 1;
|
||||
self.inner_original.chars().nth(self.pos - 1)
|
||||
}
|
||||
|
||||
pub fn peek(&self) -> Option<char> {
|
||||
self.inner_original.chars().nth(self.pos)
|
||||
}
|
||||
pub fn peek_two(&self) -> Option<char> {
|
||||
self.inner_original.chars().nth(self.pos + 1)
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> usize {
|
||||
self.pos
|
||||
}
|
||||
|
||||
pub fn start_token(&mut self) {
|
||||
self.current_token_start_pos = Some(self.pos)
|
||||
}
|
||||
pub fn end_token(&mut self) -> Span {
|
||||
if let Some(start) = self.current_token_start_pos {
|
||||
self.current_token_start_pos = None;
|
||||
Span::new(start, self.pos)
|
||||
} else {
|
||||
panic!("tried to end a token when none was started");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn position_of(&self, pos: &usize) -> (usize, usize) {
|
||||
let mut line = 0;
|
||||
let mut col = 0;
|
||||
for (n, c) in self.inner_original.chars().enumerate() {
|
||||
if c == '\n' {
|
||||
line += 1;
|
||||
col = 0;
|
||||
} else {
|
||||
col += 1;
|
||||
}
|
||||
if n == *pos {
|
||||
break;
|
||||
}
|
||||
}
|
||||
(line, col)
|
||||
}
|
||||
|
||||
pub fn span_position(&self, span: &Span) -> ((usize, usize), (usize, usize)) {
|
||||
(self.position_of(&span.start), self.position_of(&span.end))
|
||||
}
|
||||
|
||||
pub fn line_at(&self, line: usize) -> Option<&str> {
|
||||
self.inner_original.split('\n').nth(line)
|
||||
}
|
||||
|
||||
pub fn filename(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue