From 160b64427d79290a59ac48c9babca064232d8dfd Mon Sep 17 00:00:00 2001 From: Tolmachev Igor Date: Sat, 9 May 2026 20:47:04 +0300 Subject: Make project structure more consistent --- compiler/src/ast/mod.rs | 34 +--- compiler/src/ast/models.rs | 36 +++++ compiler/src/ast/parser.rs | 12 +- compiler/src/ast/tests.rs | 18 +-- compiler/src/lex/lexer.rs | 161 +++++++++++++++++++ compiler/src/lex/mod.rs | 8 + compiler/src/lex/tests.rs | 383 ++++++++++++++++++++++++++++++++++++++++++++ compiler/src/lex/token.rs | 10 ++ compiler/src/lexer/mod.rs | 172 -------------------- compiler/src/lexer/tests.rs | 383 -------------------------------------------- compiler/src/lib.rs | 2 +- 11 files changed, 614 insertions(+), 605 deletions(-) create mode 100644 compiler/src/ast/models.rs create mode 100644 compiler/src/lex/lexer.rs create mode 100644 compiler/src/lex/mod.rs create mode 100644 compiler/src/lex/tests.rs create mode 100644 compiler/src/lex/token.rs delete mode 100644 compiler/src/lexer/mod.rs delete mode 100644 compiler/src/lexer/tests.rs (limited to 'compiler/src') diff --git a/compiler/src/ast/mod.rs b/compiler/src/ast/mod.rs index 8e35baf..2a0be03 100644 --- a/compiler/src/ast/mod.rs +++ b/compiler/src/ast/mod.rs @@ -1,40 +1,10 @@ mod error; +mod models; mod parser; -use std::rc::Rc; - -use crate::span::Spanned; pub use error::Error; +pub use models::{Ast, Atom, Expr}; pub use parser::Parser; #[cfg(test)] mod tests; - -#[derive(Clone, Debug, PartialEq)] -pub enum Atom { - Float(f64), - Integer(i64), - String(Rc), - Symbol(Rc), - Bool(bool), - Nil, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Expr { - Atom(Atom), - List(Vec>), -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Program(Vec>); - -impl Program { - pub fn inner(&self) -> &[Spanned] { - &self.0 - } - - pub fn into_inner(self) -> Vec> { - self.0 - } -} diff --git a/compiler/src/ast/models.rs b/compiler/src/ast/models.rs new file mode 100644 index 0000000..db9728d --- /dev/null +++ b/compiler/src/ast/models.rs @@ -0,0 +1,36 @@ +use std::rc::Rc; + +use crate::span::Spanned; + +#[derive(Clone, Debug, PartialEq)] +pub enum Atom { + Float(f64), + Integer(i64), + String(Rc), + Symbol(Rc), + Bool(bool), + Nil, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Expr { + Atom(Atom), + List(Vec>), +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Ast(Vec>); + +impl Ast { + pub fn new(ast: Vec>) -> Self { + Self(ast) + } + + pub fn inner(&self) -> &[Spanned] { + &self.0 + } + + pub fn into_inner(self) -> Vec> { + self.0 + } +} diff --git a/compiler/src/ast/parser.rs b/compiler/src/ast/parser.rs index 263e5b7..33b36be 100644 --- a/compiler/src/ast/parser.rs +++ b/compiler/src/ast/parser.rs @@ -1,8 +1,8 @@ use std::iter::Peekable; use crate::{ - ast::{Atom, Error, Expr, Program}, - lexer::Token, + ast::{Ast, Atom, Error, Expr}, + lex::Token, span::{Pos, Span, Spanned}, }; @@ -174,13 +174,13 @@ where Err(Spanned::new(Error::UnclosedLeftPar, left_par_span)) } - pub fn parse(mut self) -> Result> { - let mut program = Vec::new(); + pub fn parse(mut self) -> Result> { + let mut ast = Vec::new(); while self.peek().is_some() { - program.push(self.parse_expr()?) + ast.push(self.parse_expr()?) } - Ok(Program(program)) + Ok(Ast::new(ast)) } } diff --git a/compiler/src/ast/tests.rs b/compiler/src/ast/tests.rs index c6d8c38..8905427 100644 --- a/compiler/src/ast/tests.rs +++ b/compiler/src/ast/tests.rs @@ -1,14 +1,10 @@ -use std::f64; -use std::fmt::Debug; -use std::iter::repeat_n; -use std::rc::Rc; - -use self::E::*; -use super::{Error, Parser, parser::MAX_DEPTH}; -use crate::ast::{Atom, Expr}; -use crate::lexer::Token; -use crate::lexer::Token::*; -use crate::span::{Pos, Span, Spanned}; +use std::{fmt::Debug, iter::repeat_n, rc::Rc}; + +use crate::{ + ast::{Atom, Error, Expr, Parser, parser::MAX_DEPTH, tests::E::*}, + lex::Token::{self, *}, + span::{Pos, Span, Spanned}, +}; #[derive(Debug, PartialEq)] enum E { diff --git a/compiler/src/lex/lexer.rs b/compiler/src/lex/lexer.rs new file mode 100644 index 0000000..801d382 --- /dev/null +++ b/compiler/src/lex/lexer.rs @@ -0,0 +1,161 @@ +use crate::{ + lex::Token, + span::{Pos, Span, Spanned}, +}; + +fn is_terminator(ch: char) -> bool { + ch.is_whitespace() || matches!(ch, '(' | ')' | '\'' | '"' | ';') +} + +pub struct Lexer<'a> { + input: &'a str, + cursor: usize, + + line: usize, + column: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + Self { + input, + cursor: 0, + + line: 1, + column: 0, + } + } + + fn rest(&self) -> &str { + &self.input[self.cursor..] + } + + fn peek(&self) -> Option { + self.rest().chars().next() + } + + fn peek_nth(&self, n: usize) -> Option { + self.rest().chars().nth(n) + } + + fn consume(&mut self) -> Option { + let ch = self.peek()?; + + self.cursor += ch.len_utf8(); + if ch == '\n' { + self.line += 1; + self.column = 0; + } else { + self.column += 1; + } + + Some(ch) + } + + fn next_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str { + let start = self.cursor; + + while let Some(ch) = self.peek() { + if !predicate(ch) { + break; + } + self.consume(); + } + + &self.input[start..self.cursor] + } + + fn next_atom(&mut self) -> &'a str { + self.next_while(|ch| !is_terminator(ch)) + } + + fn next_string(&mut self) -> Result<&'a str, &'a str> { + debug_assert_eq!(self.peek(), Some('"')); + self.consume(); + + let start = self.cursor; + + while let Some(ch) = self.peek() { + match ch { + '"' => { + let string = &self.input[start..self.cursor]; + self.consume(); + return Ok(string); + } + '\n' => { + let string = &self.input[start..self.cursor]; + self.consume(); + return Err(string); + } + '\\' => { + self.consume(); + self.consume(); + } + _ => { + self.consume(); + } + } + } + + Err(&self.input[start..self.cursor]) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Spanned>; + + fn next(&mut self) -> Option { + loop { + match self.peek()? { + ch if ch.is_whitespace() => { + self.next_while(char::is_whitespace); + } + ';' => { + self.next_while(|ch| ch != '\n'); + } + _ => break, + } + } + + let start = Pos::new(self.line, self.column, self.cursor); + + let token = match self.peek()? { + '(' => { + self.consume(); + Token::LeftPar + } + ')' => { + self.consume(); + Token::RightPar + } + '\'' => { + self.consume(); + Token::Quote + } + + // Number + ch if ch.is_ascii_digit() + || ch == '.' && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) + || matches!(ch, '+' | '-') + && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) + || matches!(ch, '+' | '-') + && self.peek_nth(1).is_some_and(|ch| ch == '.') + && self.peek_nth(2).is_some_and(|ch| ch.is_ascii_digit()) => + { + Token::Number(self.next_atom()) + } + + // String + '"' => match self.next_string() { + Ok(string) => Token::String(string), + Err(string) => Token::UnclosedString(string), + }, + + // Symbol + _ => Token::Symbol(self.next_atom()), + }; + + let end = Pos::new(self.line, self.column, self.cursor); + Some(Spanned::new(token, Span::new(start, end))) + } +} diff --git a/compiler/src/lex/mod.rs b/compiler/src/lex/mod.rs new file mode 100644 index 0000000..7bc4440 --- /dev/null +++ b/compiler/src/lex/mod.rs @@ -0,0 +1,8 @@ +mod lexer; +mod token; + +pub use lexer::Lexer; +pub use token::Token; + +#[cfg(test)] +mod tests; diff --git a/compiler/src/lex/tests.rs b/compiler/src/lex/tests.rs new file mode 100644 index 0000000..2d872a2 --- /dev/null +++ b/compiler/src/lex/tests.rs @@ -0,0 +1,383 @@ +use crate::{ + lex::{Lexer, Token, Token::*}, + span::Pos, +}; + +fn tokenize<'a>(input: &'a str) -> Vec> { + Lexer::new(input).map(|s| s.inner).collect() +} + +#[test] +fn test_spaces() { + let cases = vec![ + ("", vec![]), + (" ", vec![]), + ("\n", vec![]), + ("\t\n \r\n", vec![]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_parens() { + let cases = vec![ + ("()", vec![LeftPar, RightPar]), + ("( )", vec![LeftPar, RightPar]), + ("(())", vec![LeftPar, LeftPar, RightPar, RightPar]), + ( + "((()))", + vec![LeftPar, LeftPar, LeftPar, RightPar, RightPar, RightPar], + ), + (")(", vec![RightPar, LeftPar]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_quote() { + let cases = vec![ + ("'", vec![Quote]), + ("'a", vec![Quote, Symbol("a")]), + ("''a", vec![Quote, Quote, Symbol("a")]), + ("'()", vec![Quote, LeftPar, RightPar]), + ( + "'(1 2)", + vec![Quote, LeftPar, Number("1"), Number("2"), RightPar], + ), + ("(' )", vec![LeftPar, Quote, RightPar]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_numbers() { + let cases = vec![ + ("0", vec![Number("0")]), + ("42", vec![Number("42")]), + ("3.14", vec![Number("3.14")]), + ("-7", vec![Number("-7")]), + ("+5", vec![Number("+5")]), + ("-0.5", vec![Number("-0.5")]), + ("1e10", vec![Number("1e10")]), + ("1.5e-3", vec![Number("1.5e-3")]), + (".5", vec![Number(".5")]), + ("-.5", vec![Number("-.5")]), + ("+.5", vec![Number("+.5")]), + ("-.0", vec![Number("-.0")]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_strings() { + let cases = vec![ + (r#""""#, vec![String("")]), + (r#""hello""#, vec![String("hello")]), + (r#""hello world""#, vec![String("hello world")]), + (r#""(not a list)""#, vec![String("(not a list)")]), + (r#""'not a quote""#, vec![String("'not a quote")]), + (r#""; not a comment""#, vec![String("; not a comment")]), + (r#"" spaces ""#, vec![String(" spaces ")]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_string_escapes() { + let cases = vec![ + (r#""line\nbreak""#, vec![String(r"line\nbreak")]), + (r#""with \"quotes\"""#, vec![String(r#"with \"quotes\""#)]), + (r#""\\""#, vec![String(r"\\")]), + ("\"single\\\nline\"", vec![String("single\\\nline")]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_unclosed_strings() { + let cases = vec![ + (r#""abc"#, vec![UnclosedString("abc")]), + (r#""abc\""#, vec![UnclosedString(r#"abc\""#)]), + ("\"abc\n", vec![UnclosedString("abc")]), + ("\"abc\\\ndef", vec![UnclosedString("abc\\\ndef")]), + ("\"abc\n\"def\"", vec![UnclosedString("abc"), String("def")]), + (r#"""#, vec![UnclosedString("")]), + ("\"\n\"", vec![UnclosedString(""), UnclosedString("")]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_symbols() { + let cases = vec![ + ("foo", vec![Symbol("foo")]), + ("foo-bar", vec![Symbol("foo-bar")]), + ("foo!", vec![Symbol("foo!")]), + ("empty?", vec![Symbol("empty?")]), + ("set!", vec![Symbol("set!")]), + ("->", vec![Symbol("->")]), + ("+", vec![Symbol("+")]), + ("-", vec![Symbol("-")]), + ("*", vec![Symbol("*")]), + ("/", vec![Symbol("/")]), + ("=", vec![Symbol("=")]), + ("<=", vec![Symbol("<=")]), + (">=", vec![Symbol(">=")]), + ("a1b2", vec![Symbol("a1b2")]), + ("x", vec![Symbol("x")]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_ambiguous() { + let cases = vec![ + ("-x", vec![Symbol("-x")]), + ("+foo", vec![Symbol("+foo")]), + ("...", vec![Symbol("...")]), + (".foo", vec![Symbol(".foo")]), + ("-.", vec![Symbol("-.")]), + ("+.", vec![Symbol("+.")]), + (".", vec![Symbol(".")]), + ("+.a", vec![Symbol("+.a")]), + ("-.a", vec![Symbol("-.a")]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_no_separators() { + let cases = vec![ + ("(foo)", vec![LeftPar, Symbol("foo"), RightPar]), + ("(1)", vec![LeftPar, Number("1"), RightPar]), + ("(a)b", vec![LeftPar, Symbol("a"), RightPar, Symbol("b")]), + ("'(a)", vec![Quote, LeftPar, Symbol("a"), RightPar]), + (r#"("s")"#, vec![LeftPar, String("s"), RightPar]), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_whitespace_separators() { + let cases = vec![ + ( + "(\n foo\n bar\n)", + vec![LeftPar, Symbol("foo"), Symbol("bar"), RightPar], + ), + ( + "(\tfoo\tbar\t)", + vec![LeftPar, Symbol("foo"), Symbol("bar"), RightPar], + ), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_expressions() { + let cases = vec![ + ( + "(define x 42)", + vec![ + LeftPar, + Symbol("define"), + Symbol("x"), + Number("42"), + RightPar, + ], + ), + ( + "(+ 1 2)", + vec![LeftPar, Symbol("+"), Number("1"), Number("2"), RightPar], + ), + ( + "(if (= x 0) 'zero 'nonzero)", + vec![ + LeftPar, + Symbol("if"), + LeftPar, + Symbol("="), + Symbol("x"), + Number("0"), + RightPar, + Quote, + Symbol("zero"), + Quote, + Symbol("nonzero"), + RightPar, + ], + ), + ( + r#"(print "hello, world")"#, + vec![LeftPar, Symbol("print"), String("hello, world"), RightPar], + ), + ( + "(lambda (x) (* x x))", + vec![ + LeftPar, + Symbol("lambda"), + LeftPar, + Symbol("x"), + RightPar, + LeftPar, + Symbol("*"), + Symbol("x"), + Symbol("x"), + RightPar, + RightPar, + ], + ), + ( + "'(1 2 3)", + vec![ + Quote, + LeftPar, + Number("1"), + Number("2"), + Number("3"), + RightPar, + ], + ), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +#[test] +fn test_comments() { + let cases = vec![ + (";", vec![]), + (";\n", vec![]), + ("; comment", vec![]), + ("; comment\n", vec![]), + ("; comment\n42", vec![Number("42")]), + ("42 ; comment", vec![Number("42")]), + ("42; comment", vec![Number("42")]), + ( + "(+ 1 2) ; calc\n(- 3 4)", + vec![ + LeftPar, + Symbol("+"), + Number("1"), + Number("2"), + RightPar, + LeftPar, + Symbol("-"), + Number("3"), + Number("4"), + RightPar, + ], + ), + ]; + for (code, tokens) in cases { + assert_eq!(tokenize(code), tokens); + } +} + +fn spans(input: &str) -> Vec<(Pos, Pos)> { + Lexer::new(input) + .map(|s| (s.span.start, s.span.end)) + .collect() +} + +#[test] +fn test_span_single_char() { + let s = spans("("); + assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 1, 1))]); +} + +#[test] +fn test_span_after_leading_whitespace() { + let s = spans(" ("); + assert_eq!(s, vec![(Pos::new(1, 3, 3), Pos::new(1, 4, 4))]); +} + +#[test] +fn test_span_after_newline() { + let s = spans("\n("); + assert_eq!(s, vec![(Pos::new(2, 0, 1), Pos::new(2, 1, 2))]); +} + +#[test] +fn test_span_multi_char() { + let s = spans("foo"); + assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 3, 3))]); +} + +#[test] +fn test_span_string() { + let s = spans(r#""hi""#); + assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 4, 4))]); +} + +#[test] +fn test_span_sequence() { + // (foo 42) + // 012345678 + let s = spans("(foo 42)"); + assert_eq!( + s, + vec![ + (Pos::new(1, 0, 0), Pos::new(1, 1, 1)), // ( + (Pos::new(1, 1, 1), Pos::new(1, 4, 4)), // foo + (Pos::new(1, 5, 5), Pos::new(1, 7, 7)), // 42 + (Pos::new(1, 7, 7), Pos::new(1, 8, 8)), // ) + ], + ); +} + +#[test] +fn test_span_lines() { + let s = spans("foo\nbar"); + assert_eq!( + s, + vec![ + (Pos::new(1, 0, 0), Pos::new(1, 3, 3)), + (Pos::new(2, 0, 4), Pos::new(2, 3, 7)), + ], + ); +} + +#[test] +fn test_span_after_comment() { + // ; cm\nfoo + // 01234 5678 + let s = spans("; cm\nfoo"); + assert_eq!(s, vec![(Pos::new(2, 0, 5), Pos::new(2, 3, 8))]); +} + +#[test] +fn test_span_after_quote() { + // 'hello + // 0123456 + let s = spans("'hello"); + assert_eq!( + s, + vec![ + (Pos::new(1, 0, 0), Pos::new(1, 1, 1)), + (Pos::new(1, 1, 1), Pos::new(1, 6, 6)) + ] + ); +} diff --git a/compiler/src/lex/token.rs b/compiler/src/lex/token.rs new file mode 100644 index 0000000..2d07885 --- /dev/null +++ b/compiler/src/lex/token.rs @@ -0,0 +1,10 @@ +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Token<'a> { + LeftPar, + RightPar, + Quote, + Number(&'a str), + String(&'a str), + UnclosedString(&'a str), + Symbol(&'a str), +} diff --git a/compiler/src/lexer/mod.rs b/compiler/src/lexer/mod.rs deleted file mode 100644 index f3c8b76..0000000 --- a/compiler/src/lexer/mod.rs +++ /dev/null @@ -1,172 +0,0 @@ -use crate::span::{Pos, Span, Spanned}; - -#[cfg(test)] -mod tests; - -fn is_terminator(ch: char) -> bool { - ch.is_whitespace() || matches!(ch, '(' | ')' | '\'' | '"' | ';') -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Token<'a> { - LeftPar, - RightPar, - Quote, - Number(&'a str), - String(&'a str), - UnclosedString(&'a str), - Symbol(&'a str), -} - -pub struct Lexer<'a> { - input: &'a str, - cursor: usize, - - line: usize, - column: usize, -} - -impl<'a> Lexer<'a> { - pub fn new(input: &'a str) -> Self { - Self { - input, - cursor: 0, - - line: 1, - column: 0, - } - } - - fn rest(&self) -> &str { - &self.input[self.cursor..] - } - - fn peek(&self) -> Option { - self.rest().chars().next() - } - - fn peek_nth(&self, n: usize) -> Option { - self.rest().chars().nth(n) - } - - fn consume(&mut self) -> Option { - let ch = self.peek()?; - - self.cursor += ch.len_utf8(); - if ch == '\n' { - self.line += 1; - self.column = 0; - } else { - self.column += 1; - } - - Some(ch) - } - - fn next_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str { - let start = self.cursor; - - while let Some(ch) = self.peek() { - if !predicate(ch) { - break; - } - self.consume(); - } - - &self.input[start..self.cursor] - } - - fn next_atom(&mut self) -> &'a str { - self.next_while(|ch| !is_terminator(ch)) - } - - fn next_string(&mut self) -> Result<&'a str, &'a str> { - debug_assert_eq!(self.peek(), Some('"')); - self.consume(); - - let start = self.cursor; - - while let Some(ch) = self.peek() { - match ch { - '"' => { - let string = &self.input[start..self.cursor]; - self.consume(); - return Ok(string); - } - '\n' => { - let string = &self.input[start..self.cursor]; - self.consume(); - return Err(string); - } - '\\' => { - self.consume(); - self.consume(); - } - _ => { - self.consume(); - } - } - } - - Err(&self.input[start..self.cursor]) - } -} - -impl<'a> Iterator for Lexer<'a> { - type Item = Spanned>; - - fn next(&mut self) -> Option { - loop { - match self.peek()? { - ch if ch.is_whitespace() => { - self.next_while(char::is_whitespace); - } - ';' => { - self.next_while(|ch| ch != '\n'); - } - _ => break, - } - } - - let start = Pos::new(self.line, self.column, self.cursor); - - let token = match self.peek()? { - '(' => { - self.consume(); - Token::LeftPar - } - ')' => { - self.consume(); - Token::RightPar - } - '\'' => { - self.consume(); - Token::Quote - } - - // Number - ch if ch.is_ascii_digit() - || ch == '.' && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) - || matches!(ch, '+' | '-') - && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) - || matches!(ch, '+' | '-') - && self.peek_nth(1).is_some_and(|ch| ch == '.') - && self.peek_nth(2).is_some_and(|ch| ch.is_ascii_digit()) => - { - Token::Number(self.next_atom()) - } - - // String - '"' => match self.next_string() { - Ok(string) => Token::String(string), - Err(string) => Token::UnclosedString(string), - }, - - // Symbol - _ => Token::Symbol(self.next_atom()), - }; - - let end = Pos::new(self.line, self.column, self.cursor); - Some(Spanned::new(token, Span::new(start, end))) - } -} diff --git a/compiler/src/lexer/tests.rs b/compiler/src/lexer/tests.rs deleted file mode 100644 index 6f96c65..0000000 --- a/compiler/src/lexer/tests.rs +++ /dev/null @@ -1,383 +0,0 @@ -use crate::span::Pos; - -use super::Token::*; -use super::{Lexer, Token}; - -fn tokenize<'a>(input: &'a str) -> Vec> { - Lexer::new(input).map(|s| s.inner).collect() -} - -#[test] -fn test_spaces() { - let cases = vec![ - ("", vec![]), - (" ", vec![]), - ("\n", vec![]), - ("\t\n \r\n", vec![]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_parens() { - let cases = vec![ - ("()", vec![LeftPar, RightPar]), - ("( )", vec![LeftPar, RightPar]), - ("(())", vec![LeftPar, LeftPar, RightPar, RightPar]), - ( - "((()))", - vec![LeftPar, LeftPar, LeftPar, RightPar, RightPar, RightPar], - ), - (")(", vec![RightPar, LeftPar]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_quote() { - let cases = vec![ - ("'", vec![Quote]), - ("'a", vec![Quote, Symbol("a")]), - ("''a", vec![Quote, Quote, Symbol("a")]), - ("'()", vec![Quote, LeftPar, RightPar]), - ( - "'(1 2)", - vec![Quote, LeftPar, Number("1"), Number("2"), RightPar], - ), - ("(' )", vec![LeftPar, Quote, RightPar]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_numbers() { - let cases = vec![ - ("0", vec![Number("0")]), - ("42", vec![Number("42")]), - ("3.14", vec![Number("3.14")]), - ("-7", vec![Number("-7")]), - ("+5", vec![Number("+5")]), - ("-0.5", vec![Number("-0.5")]), - ("1e10", vec![Number("1e10")]), - ("1.5e-3", vec![Number("1.5e-3")]), - (".5", vec![Number(".5")]), - ("-.5", vec![Number("-.5")]), - ("+.5", vec![Number("+.5")]), - ("-.0", vec![Number("-.0")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_strings() { - let cases = vec![ - (r#""""#, vec![String("")]), - (r#""hello""#, vec![String("hello")]), - (r#""hello world""#, vec![String("hello world")]), - (r#""(not a list)""#, vec![String("(not a list)")]), - (r#""'not a quote""#, vec![String("'not a quote")]), - (r#""; not a comment""#, vec![String("; not a comment")]), - (r#"" spaces ""#, vec![String(" spaces ")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_string_escapes() { - let cases = vec![ - (r#""line\nbreak""#, vec![String(r"line\nbreak")]), - (r#""with \"quotes\"""#, vec![String(r#"with \"quotes\""#)]), - (r#""\\""#, vec![String(r"\\")]), - ("\"single\\\nline\"", vec![String("single\\\nline")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_unclosed_strings() { - let cases = vec![ - (r#""abc"#, vec![UnclosedString("abc")]), - (r#""abc\""#, vec![UnclosedString(r#"abc\""#)]), - ("\"abc\n", vec![UnclosedString("abc")]), - ("\"abc\\\ndef", vec![UnclosedString("abc\\\ndef")]), - ("\"abc\n\"def\"", vec![UnclosedString("abc"), String("def")]), - (r#"""#, vec![UnclosedString("")]), - ("\"\n\"", vec![UnclosedString(""), UnclosedString("")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_symbols() { - let cases = vec![ - ("foo", vec![Symbol("foo")]), - ("foo-bar", vec![Symbol("foo-bar")]), - ("foo!", vec![Symbol("foo!")]), - ("empty?", vec![Symbol("empty?")]), - ("set!", vec![Symbol("set!")]), - ("->", vec![Symbol("->")]), - ("+", vec![Symbol("+")]), - ("-", vec![Symbol("-")]), - ("*", vec![Symbol("*")]), - ("/", vec![Symbol("/")]), - ("=", vec![Symbol("=")]), - ("<=", vec![Symbol("<=")]), - (">=", vec![Symbol(">=")]), - ("a1b2", vec![Symbol("a1b2")]), - ("x", vec![Symbol("x")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_ambiguous() { - let cases = vec![ - ("-x", vec![Symbol("-x")]), - ("+foo", vec![Symbol("+foo")]), - ("...", vec![Symbol("...")]), - (".foo", vec![Symbol(".foo")]), - ("-.", vec![Symbol("-.")]), - ("+.", vec![Symbol("+.")]), - (".", vec![Symbol(".")]), - ("+.a", vec![Symbol("+.a")]), - ("-.a", vec![Symbol("-.a")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_no_separators() { - let cases = vec![ - ("(foo)", vec![LeftPar, Symbol("foo"), RightPar]), - ("(1)", vec![LeftPar, Number("1"), RightPar]), - ("(a)b", vec![LeftPar, Symbol("a"), RightPar, Symbol("b")]), - ("'(a)", vec![Quote, LeftPar, Symbol("a"), RightPar]), - (r#"("s")"#, vec![LeftPar, String("s"), RightPar]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_whitespace_separators() { - let cases = vec![ - ( - "(\n foo\n bar\n)", - vec![LeftPar, Symbol("foo"), Symbol("bar"), RightPar], - ), - ( - "(\tfoo\tbar\t)", - vec![LeftPar, Symbol("foo"), Symbol("bar"), RightPar], - ), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_expressions() { - let cases = vec![ - ( - "(define x 42)", - vec![ - LeftPar, - Symbol("define"), - Symbol("x"), - Number("42"), - RightPar, - ], - ), - ( - "(+ 1 2)", - vec![LeftPar, Symbol("+"), Number("1"), Number("2"), RightPar], - ), - ( - "(if (= x 0) 'zero 'nonzero)", - vec![ - LeftPar, - Symbol("if"), - LeftPar, - Symbol("="), - Symbol("x"), - Number("0"), - RightPar, - Quote, - Symbol("zero"), - Quote, - Symbol("nonzero"), - RightPar, - ], - ), - ( - r#"(print "hello, world")"#, - vec![LeftPar, Symbol("print"), String("hello, world"), RightPar], - ), - ( - "(lambda (x) (* x x))", - vec![ - LeftPar, - Symbol("lambda"), - LeftPar, - Symbol("x"), - RightPar, - LeftPar, - Symbol("*"), - Symbol("x"), - Symbol("x"), - RightPar, - RightPar, - ], - ), - ( - "'(1 2 3)", - vec![ - Quote, - LeftPar, - Number("1"), - Number("2"), - Number("3"), - RightPar, - ], - ), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_comments() { - let cases = vec![ - (";", vec![]), - (";\n", vec![]), - ("; comment", vec![]), - ("; comment\n", vec![]), - ("; comment\n42", vec![Number("42")]), - ("42 ; comment", vec![Number("42")]), - ("42; comment", vec![Number("42")]), - ( - "(+ 1 2) ; calc\n(- 3 4)", - vec![ - LeftPar, - Symbol("+"), - Number("1"), - Number("2"), - RightPar, - LeftPar, - Symbol("-"), - Number("3"), - Number("4"), - RightPar, - ], - ), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -fn spans(input: &str) -> Vec<(Pos, Pos)> { - Lexer::new(input) - .map(|s| (s.span.start, s.span.end)) - .collect() -} - -#[test] -fn test_span_single_char() { - let s = spans("("); - assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 1, 1))]); -} - -#[test] -fn test_span_after_leading_whitespace() { - let s = spans(" ("); - assert_eq!(s, vec![(Pos::new(1, 3, 3), Pos::new(1, 4, 4))]); -} - -#[test] -fn test_span_after_newline() { - let s = spans("\n("); - assert_eq!(s, vec![(Pos::new(2, 0, 1), Pos::new(2, 1, 2))]); -} - -#[test] -fn test_span_multi_char() { - let s = spans("foo"); - assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 3, 3))]); -} - -#[test] -fn test_span_string() { - let s = spans(r#""hi""#); - assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 4, 4))]); -} - -#[test] -fn test_span_sequence() { - // (foo 42) - // 012345678 - let s = spans("(foo 42)"); - assert_eq!( - s, - vec![ - (Pos::new(1, 0, 0), Pos::new(1, 1, 1)), // ( - (Pos::new(1, 1, 1), Pos::new(1, 4, 4)), // foo - (Pos::new(1, 5, 5), Pos::new(1, 7, 7)), // 42 - (Pos::new(1, 7, 7), Pos::new(1, 8, 8)), // ) - ], - ); -} - -#[test] -fn test_span_lines() { - let s = spans("foo\nbar"); - assert_eq!( - s, - vec![ - (Pos::new(1, 0, 0), Pos::new(1, 3, 3)), - (Pos::new(2, 0, 4), Pos::new(2, 3, 7)), - ], - ); -} - -#[test] -fn test_span_after_comment() { - // ; cm\nfoo - // 01234 5678 - let s = spans("; cm\nfoo"); - assert_eq!(s, vec![(Pos::new(2, 0, 5), Pos::new(2, 3, 8))]); -} - -#[test] -fn test_span_after_quote() { - // 'hello - // 0123456 - let s = spans("'hello"); - assert_eq!( - s, - vec![ - (Pos::new(1, 0, 0), Pos::new(1, 1, 1)), - (Pos::new(1, 1, 1), Pos::new(1, 6, 6)) - ] - ); -} diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index b9b7a46..80311b4 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,3 +1,3 @@ pub mod ast; -pub mod lexer; +pub mod lex; pub mod span; -- cgit v1.3