From 160b64427d79290a59ac48c9babca064232d8dfd Mon Sep 17 00:00:00 2001 From: Tolmachev Igor Date: Sat, 9 May 2026 20:47:04 +0300 Subject: Make project structure more consistent --- compiler/src/lexer/mod.rs | 172 -------------------- compiler/src/lexer/tests.rs | 383 -------------------------------------------- 2 files changed, 555 deletions(-) delete mode 100644 compiler/src/lexer/mod.rs delete mode 100644 compiler/src/lexer/tests.rs (limited to 'compiler/src/lexer') diff --git a/compiler/src/lexer/mod.rs b/compiler/src/lexer/mod.rs deleted file mode 100644 index f3c8b76..0000000 --- a/compiler/src/lexer/mod.rs +++ /dev/null @@ -1,172 +0,0 @@ -use crate::span::{Pos, Span, Spanned}; - -#[cfg(test)] -mod tests; - -fn is_terminator(ch: char) -> bool { - ch.is_whitespace() || matches!(ch, '(' | ')' | '\'' | '"' | ';') -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Token<'a> { - LeftPar, - RightPar, - Quote, - Number(&'a str), - String(&'a str), - UnclosedString(&'a str), - Symbol(&'a str), -} - -pub struct Lexer<'a> { - input: &'a str, - cursor: usize, - - line: usize, - column: usize, -} - -impl<'a> Lexer<'a> { - pub fn new(input: &'a str) -> Self { - Self { - input, - cursor: 0, - - line: 1, - column: 0, - } - } - - fn rest(&self) -> &str { - &self.input[self.cursor..] - } - - fn peek(&self) -> Option { - self.rest().chars().next() - } - - fn peek_nth(&self, n: usize) -> Option { - self.rest().chars().nth(n) - } - - fn consume(&mut self) -> Option { - let ch = self.peek()?; - - self.cursor += ch.len_utf8(); - if ch == '\n' { - self.line += 1; - self.column = 0; - } else { - self.column += 1; - } - - Some(ch) - } - - fn next_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str { - let start = self.cursor; - - while let Some(ch) = self.peek() { - if !predicate(ch) { - break; - } - self.consume(); - } - - &self.input[start..self.cursor] - } - - fn next_atom(&mut self) -> &'a str { - self.next_while(|ch| !is_terminator(ch)) - } - - fn next_string(&mut self) -> Result<&'a str, &'a str> { - debug_assert_eq!(self.peek(), Some('"')); - self.consume(); - - let start = self.cursor; - - while let Some(ch) = self.peek() { - match ch { - '"' => { - let string = &self.input[start..self.cursor]; - self.consume(); - return Ok(string); - } - '\n' => { - let string = &self.input[start..self.cursor]; - self.consume(); - return Err(string); - } - '\\' => { - self.consume(); - self.consume(); - } - _ => { - self.consume(); - } - } - } - - Err(&self.input[start..self.cursor]) - } -} - -impl<'a> Iterator for Lexer<'a> { - type Item = Spanned>; - - fn next(&mut self) -> Option { - loop { - match self.peek()? { - ch if ch.is_whitespace() => { - self.next_while(char::is_whitespace); - } - ';' => { - self.next_while(|ch| ch != '\n'); - } - _ => break, - } - } - - let start = Pos::new(self.line, self.column, self.cursor); - - let token = match self.peek()? { - '(' => { - self.consume(); - Token::LeftPar - } - ')' => { - self.consume(); - Token::RightPar - } - '\'' => { - self.consume(); - Token::Quote - } - - // Number - ch if ch.is_ascii_digit() - || ch == '.' && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) - || matches!(ch, '+' | '-') - && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) - || matches!(ch, '+' | '-') - && self.peek_nth(1).is_some_and(|ch| ch == '.') - && self.peek_nth(2).is_some_and(|ch| ch.is_ascii_digit()) => - { - Token::Number(self.next_atom()) - } - - // String - '"' => match self.next_string() { - Ok(string) => Token::String(string), - Err(string) => Token::UnclosedString(string), - }, - - // Symbol - _ => Token::Symbol(self.next_atom()), - }; - - let end = Pos::new(self.line, self.column, self.cursor); - Some(Spanned::new(token, Span::new(start, end))) - } -} diff --git a/compiler/src/lexer/tests.rs b/compiler/src/lexer/tests.rs deleted file mode 100644 index 6f96c65..0000000 --- a/compiler/src/lexer/tests.rs +++ /dev/null @@ -1,383 +0,0 @@ -use crate::span::Pos; - -use super::Token::*; -use super::{Lexer, Token}; - -fn tokenize<'a>(input: &'a str) -> Vec> { - Lexer::new(input).map(|s| s.inner).collect() -} - -#[test] -fn test_spaces() { - let cases = vec![ - ("", vec![]), - (" ", vec![]), - ("\n", vec![]), - ("\t\n \r\n", vec![]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_parens() { - let cases = vec![ - ("()", vec![LeftPar, RightPar]), - ("( )", vec![LeftPar, RightPar]), - ("(())", vec![LeftPar, LeftPar, RightPar, RightPar]), - ( - "((()))", - vec![LeftPar, LeftPar, LeftPar, RightPar, RightPar, RightPar], - ), - (")(", vec![RightPar, LeftPar]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_quote() { - let cases = vec![ - ("'", vec![Quote]), - ("'a", vec![Quote, Symbol("a")]), - ("''a", vec![Quote, Quote, Symbol("a")]), - ("'()", vec![Quote, LeftPar, RightPar]), - ( - "'(1 2)", - vec![Quote, LeftPar, Number("1"), Number("2"), RightPar], - ), - ("(' )", vec![LeftPar, Quote, RightPar]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_numbers() { - let cases = vec![ - ("0", vec![Number("0")]), - ("42", vec![Number("42")]), - ("3.14", vec![Number("3.14")]), - ("-7", vec![Number("-7")]), - ("+5", vec![Number("+5")]), - ("-0.5", vec![Number("-0.5")]), - ("1e10", vec![Number("1e10")]), - ("1.5e-3", vec![Number("1.5e-3")]), - (".5", vec![Number(".5")]), - ("-.5", vec![Number("-.5")]), - ("+.5", vec![Number("+.5")]), - ("-.0", vec![Number("-.0")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_strings() { - let cases = vec![ - (r#""""#, vec![String("")]), - (r#""hello""#, vec![String("hello")]), - (r#""hello world""#, vec![String("hello world")]), - (r#""(not a list)""#, vec![String("(not a list)")]), - (r#""'not a quote""#, vec![String("'not a quote")]), - (r#""; not a comment""#, vec![String("; not a comment")]), - (r#"" spaces ""#, vec![String(" spaces ")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_string_escapes() { - let cases = vec![ - (r#""line\nbreak""#, vec![String(r"line\nbreak")]), - (r#""with \"quotes\"""#, vec![String(r#"with \"quotes\""#)]), - (r#""\\""#, vec![String(r"\\")]), - ("\"single\\\nline\"", vec![String("single\\\nline")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_unclosed_strings() { - let cases = vec![ - (r#""abc"#, vec![UnclosedString("abc")]), - (r#""abc\""#, vec![UnclosedString(r#"abc\""#)]), - ("\"abc\n", vec![UnclosedString("abc")]), - ("\"abc\\\ndef", vec![UnclosedString("abc\\\ndef")]), - ("\"abc\n\"def\"", vec![UnclosedString("abc"), String("def")]), - (r#"""#, vec![UnclosedString("")]), - ("\"\n\"", vec![UnclosedString(""), UnclosedString("")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_symbols() { - let cases = vec![ - ("foo", vec![Symbol("foo")]), - ("foo-bar", vec![Symbol("foo-bar")]), - ("foo!", vec![Symbol("foo!")]), - ("empty?", vec![Symbol("empty?")]), - ("set!", vec![Symbol("set!")]), - ("->", vec![Symbol("->")]), - ("+", vec![Symbol("+")]), - ("-", vec![Symbol("-")]), - ("*", vec![Symbol("*")]), - ("/", vec![Symbol("/")]), - ("=", vec![Symbol("=")]), - ("<=", vec![Symbol("<=")]), - (">=", vec![Symbol(">=")]), - ("a1b2", vec![Symbol("a1b2")]), - ("x", vec![Symbol("x")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_ambiguous() { - let cases = vec![ - ("-x", vec![Symbol("-x")]), - ("+foo", vec![Symbol("+foo")]), - ("...", vec![Symbol("...")]), - (".foo", vec![Symbol(".foo")]), - ("-.", vec![Symbol("-.")]), - ("+.", vec![Symbol("+.")]), - (".", vec![Symbol(".")]), - ("+.a", vec![Symbol("+.a")]), - ("-.a", vec![Symbol("-.a")]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_no_separators() { - let cases = vec![ - ("(foo)", vec![LeftPar, Symbol("foo"), RightPar]), - ("(1)", vec![LeftPar, Number("1"), RightPar]), - ("(a)b", vec![LeftPar, Symbol("a"), RightPar, Symbol("b")]), - ("'(a)", vec![Quote, LeftPar, Symbol("a"), RightPar]), - (r#"("s")"#, vec![LeftPar, String("s"), RightPar]), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_whitespace_separators() { - let cases = vec![ - ( - "(\n foo\n bar\n)", - vec![LeftPar, Symbol("foo"), Symbol("bar"), RightPar], - ), - ( - "(\tfoo\tbar\t)", - vec![LeftPar, Symbol("foo"), Symbol("bar"), RightPar], - ), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_expressions() { - let cases = vec![ - ( - "(define x 42)", - vec![ - LeftPar, - Symbol("define"), - Symbol("x"), - Number("42"), - RightPar, - ], - ), - ( - "(+ 1 2)", - vec![LeftPar, Symbol("+"), Number("1"), Number("2"), RightPar], - ), - ( - "(if (= x 0) 'zero 'nonzero)", - vec![ - LeftPar, - Symbol("if"), - LeftPar, - Symbol("="), - Symbol("x"), - Number("0"), - RightPar, - Quote, - Symbol("zero"), - Quote, - Symbol("nonzero"), - RightPar, - ], - ), - ( - r#"(print "hello, world")"#, - vec![LeftPar, Symbol("print"), String("hello, world"), RightPar], - ), - ( - "(lambda (x) (* x x))", - vec![ - LeftPar, - Symbol("lambda"), - LeftPar, - Symbol("x"), - RightPar, - LeftPar, - Symbol("*"), - Symbol("x"), - Symbol("x"), - RightPar, - RightPar, - ], - ), - ( - "'(1 2 3)", - vec![ - Quote, - LeftPar, - Number("1"), - Number("2"), - Number("3"), - RightPar, - ], - ), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -#[test] -fn test_comments() { - let cases = vec![ - (";", vec![]), - (";\n", vec![]), - ("; comment", vec![]), - ("; comment\n", vec![]), - ("; comment\n42", vec![Number("42")]), - ("42 ; comment", vec![Number("42")]), - ("42; comment", vec![Number("42")]), - ( - "(+ 1 2) ; calc\n(- 3 4)", - vec![ - LeftPar, - Symbol("+"), - Number("1"), - Number("2"), - RightPar, - LeftPar, - Symbol("-"), - Number("3"), - Number("4"), - RightPar, - ], - ), - ]; - for (code, tokens) in cases { - assert_eq!(tokenize(code), tokens); - } -} - -fn spans(input: &str) -> Vec<(Pos, Pos)> { - Lexer::new(input) - .map(|s| (s.span.start, s.span.end)) - .collect() -} - -#[test] -fn test_span_single_char() { - let s = spans("("); - assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 1, 1))]); -} - -#[test] -fn test_span_after_leading_whitespace() { - let s = spans(" ("); - assert_eq!(s, vec![(Pos::new(1, 3, 3), Pos::new(1, 4, 4))]); -} - -#[test] -fn test_span_after_newline() { - let s = spans("\n("); - assert_eq!(s, vec![(Pos::new(2, 0, 1), Pos::new(2, 1, 2))]); -} - -#[test] -fn test_span_multi_char() { - let s = spans("foo"); - assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 3, 3))]); -} - -#[test] -fn test_span_string() { - let s = spans(r#""hi""#); - assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 4, 4))]); -} - -#[test] -fn test_span_sequence() { - // (foo 42) - // 012345678 - let s = spans("(foo 42)"); - assert_eq!( - s, - vec![ - (Pos::new(1, 0, 0), Pos::new(1, 1, 1)), // ( - (Pos::new(1, 1, 1), Pos::new(1, 4, 4)), // foo - (Pos::new(1, 5, 5), Pos::new(1, 7, 7)), // 42 - (Pos::new(1, 7, 7), Pos::new(1, 8, 8)), // ) - ], - ); -} - -#[test] -fn test_span_lines() { - let s = spans("foo\nbar"); - assert_eq!( - s, - vec![ - (Pos::new(1, 0, 0), Pos::new(1, 3, 3)), - (Pos::new(2, 0, 4), Pos::new(2, 3, 7)), - ], - ); -} - -#[test] -fn test_span_after_comment() { - // ; cm\nfoo - // 01234 5678 - let s = spans("; cm\nfoo"); - assert_eq!(s, vec![(Pos::new(2, 0, 5), Pos::new(2, 3, 8))]); -} - -#[test] -fn test_span_after_quote() { - // 'hello - // 0123456 - let s = spans("'hello"); - assert_eq!( - s, - vec![ - (Pos::new(1, 0, 0), Pos::new(1, 1, 1)), - (Pos::new(1, 1, 1), Pos::new(1, 6, 6)) - ] - ); -} -- cgit v1.3