From 6c5c627dd441b0e7ac52cfd05e1923584dd213ae Mon Sep 17 00:00:00 2001 From: Tolmachev Igor Date: Sat, 9 May 2026 18:39:11 +0300 Subject: Add parser tests --- compiler/src/ast/tests.rs | 747 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 747 insertions(+) create mode 100644 compiler/src/ast/tests.rs (limited to 'compiler/src/ast/tests.rs') diff --git a/compiler/src/ast/tests.rs b/compiler/src/ast/tests.rs new file mode 100644 index 0000000..708e788 --- /dev/null +++ b/compiler/src/ast/tests.rs @@ -0,0 +1,747 @@ +use std::f64; +use std::fmt::Debug; +use std::iter::repeat_n; +use std::rc::Rc; + +use self::E::*; +use super::{Error, Parser, parser::MAX_DEPTH}; +use crate::ast::{Atom, Expr}; +use crate::lexer::Token; +use crate::lexer::Token::*; +use crate::span::{Pos, Span, Spanned}; + +#[derive(Debug, PartialEq)] +enum E { + Flt(f64), + Int(i64), + Str(&'static str), + Sym(&'static str), + Bool(bool), + Nil, + List(Vec), +} + +impl From for E { + fn from(expr: Expr) -> Self { + match expr { + Expr::Atom(atom) => match atom { + Atom::Float(f) => Flt(f), + Atom::Integer(i) => Int(i), + Atom::String(s) => Str(Box::leak(s.into())), + Atom::Symbol(s) => Sym(Box::leak(s.into())), + Atom::Bool(b) => Bool(b), + Atom::Nil => Nil, + }, + Expr::List(l) => List(l.into_iter().map(|e| e.inner.into()).collect()), + } + } +} + +fn dummy_tokens(tokens: Vec>) -> impl Iterator>> { + let span = Span::new(Pos::new(0, 0, 0), Pos::new(0, 0, 0)); + tokens.into_iter().map(move |t| Spanned::new(t, span)) +} + +fn parse(tokens: Vec>) -> Vec { + Parser::new(dummy_tokens(tokens)) + .parse() + .unwrap() + .into_inner() + .into_iter() + .map(|e| e.inner.into()) + .collect() +} + +#[test] +fn test_empty() { + assert_eq!(parse(vec![]), vec![], "input: empty"); +} + +#[test] +fn test_integers() { + let cases = vec![ + (vec![Number("0")], vec![Int(0)]), + (vec![Number("42")], vec![Int(42)]), + (vec![Number("-7")], vec![Int(-7)]), + (vec![Number("+5")], vec![Int(5)]), + (vec![Number("9223372036854775807")], vec![Int(i64::MAX)]), + (vec![Number("-9223372036854775808")], vec![Int(i64::MIN)]), + ]; + for (tokens, ast) in cases { + assert_eq!(parse(tokens.clone()), ast, "input: {tokens:?}"); + } +} + +#[test] +fn test_floats() { + let cases = vec![ + (vec![Number("2.71")], vec![Flt(2.71)]), + (vec![Number("-2.5")], vec![Flt(-2.5)]), + (vec![Number("+0.0")], vec![Flt(0.0)]), + (vec![Number(".5")], vec![Flt(0.5)]), + (vec![Number("-.5")], vec![Flt(-0.5)]), + (vec![Number("+.5")], vec![Flt(0.5)]), + (vec![Number("1e10")], vec![Flt(1e10)]), + (vec![Number("1E10")], vec![Flt(1e10)]), + (vec![Number("1.5e-3")], vec![Flt(1.5e-3)]), + (vec![Number("-1.5E+3")], vec![Flt(-1.5e3)]), + (vec![Number("inf")], vec![Flt(f64::INFINITY)]), + (vec![Number("-inf")], vec![Flt(f64::NEG_INFINITY)]), + (vec![Number("1e9999")], vec![Flt(f64::INFINITY)]), + (vec![Number("-1e9999")], vec![Flt(f64::NEG_INFINITY)]), + ]; + for (tokens, ast) in cases { + assert_eq!(parse(tokens.clone()), ast, "input: {tokens:?}"); + } +} + +#[test] +fn test_float_nan() { + let tokens = vec![Number("nan")]; + let expr = &parse(tokens.clone())[0]; + assert!( + matches!(expr, Flt(f) if f.is_nan()), + "input: {tokens:?}, got: {expr:?}" + ); +} + +#[test] +fn test_keywords() { + let cases = vec![ + (vec![Symbol("true")], vec![Bool(true)]), + (vec![Symbol("false")], vec![Bool(false)]), + (vec![Symbol("nil")], vec![Nil]), + (vec![Symbol("inf")], vec![Flt(f64::INFINITY)]), + (vec![Symbol("+inf")], vec![Flt(f64::INFINITY)]), + (vec![Symbol("-inf")], vec![Flt(f64::NEG_INFINITY)]), + (vec![LeftPar, RightPar], vec![Nil]), + ]; + for (tokens, ast) in cases { + assert_eq!(parse(tokens.clone()), ast, "input: {tokens:?}"); + } +} + +#[test] +fn test_keyword_nan() { + let tokens = vec![Symbol("nan")]; + let expr = &parse(tokens.clone())[0]; + assert!( + matches!(expr, Flt(f) if f.is_nan()), + "input: {tokens:?}, got: {expr:?}" + ); +} + +#[test] +fn test_keywords_case_sensitive() { + let cases = vec![ + (vec![Symbol("True")], vec![Sym("True")]), + (vec![Symbol("FALSE")], vec![Sym("FALSE")]), + (vec![Symbol("NIL")], vec![Sym("NIL")]), + (vec![Symbol("Nil")], vec![Sym("Nil")]), + (vec![Symbol("Inf")], vec![Sym("Inf")]), + (vec![Symbol("INF")], vec![Sym("INF")]), + (vec![Symbol("NaN")], vec![Sym("NaN")]), + (vec![Symbol("NAN")], vec![Sym("NAN")]), + ]; + for (tokens, ast) in cases { + assert_eq!(parse(tokens.clone()), ast, "input: {tokens:?}"); + } +} + +#[test] +fn test_symbols() { + let cases = vec![ + (vec![Symbol("foo")], vec![Sym("foo")]), + (vec![Symbol("foo-bar")], vec![Sym("foo-bar")]), + (vec![Symbol("set!")], vec![Sym("set!")]), + (vec![Symbol("empty?")], vec![Sym("empty?")]), + (vec![Symbol("+")], vec![Sym("+")]), + (vec![Symbol("-")], vec![Sym("-")]), + (vec![Symbol("<=")], vec![Sym("<=")]), + (vec![Symbol(",")], vec![Sym(",")]), + ]; + for (tokens, ast) in cases { + assert_eq!(parse(tokens.clone()), ast, "input: {tokens:?}"); + } +} + +#[test] +fn test_strings() { + let cases = vec![ + (vec![String("")], vec![Str("")]), + (vec![String("hello")], vec![Str("hello")]), + (vec![String("(not a list)")], vec![Str("(not a list)")]), + ( + vec![String("; not a comment")], + vec![Str("; not a comment")], + ), + ]; + for (tokens, ast) in cases { + assert_eq!(parse(tokens.clone()), ast, "input: {tokens:?}"); + } +} + +#[test] +fn test_string_escapes() { + let cases = vec![ + (vec![String(r"a\nb")], vec![Str("a\nb")]), + (vec![String(r#"a\"b"#)], vec![Str("a\"b")]), + (vec![String("a\\\nb")], vec![Str("ab")]), + ]; + for (tokens, ast) in cases { + assert_eq!(parse(tokens.clone()), ast, "input: {tokens:?}"); + } +} + +#[test] +fn test_lists() { + let cases = vec![ + // (1) + ( + vec![LeftPar, Number("1"), RightPar], + vec![List(vec![Int(1)])], + ), + // (1 2 3) + ( + vec![LeftPar, Number("1"), Number("2"), Number("3"), RightPar], + vec![List(vec![Int(1), Int(2), Int(3)])], + ), + // (()) -> (nil) + ( + vec![LeftPar, LeftPar, RightPar, RightPar], + vec![List(vec![Nil])], + ), + // (a (b c) d) + ( + vec![ + LeftPar, + Symbol("a"), + LeftPar, + Symbol("b"), + Symbol("c"), + RightPar, + Symbol("d"), + RightPar, + ], + vec![List(vec![ + Sym("a"), + List(vec![Sym("b"), Sym("c")]), + Sym("d"), + ])], + ), + // (define x 42) + ( + vec![ + LeftPar, + Symbol("define"), + Symbol("x"), + Number("42"), + RightPar, + ], + vec![List(vec![Sym("define"), Sym("x"), Int(42)])], + ), + // (1 2.5 "s" foo true nil) + ( + vec![ + LeftPar, + Number("1"), + Number("2.5"), + String("s"), + Symbol("foo"), + Symbol("true"), + Symbol("nil"), + RightPar, + ], + vec![List(vec![ + Int(1), + Flt(2.5), + Str("s"), + Sym("foo"), + Bool(true), + Nil, + ])], + ), + ]; + for (tokens, ast) in cases { + assert_eq!(parse(tokens.clone()), ast, "input: {tokens:?}"); + } +} + +fn quote(expr: E) -> E { + List(vec![Sym("quote"), expr]) +} + +#[test] +fn test_quote() { + let cases = vec![ + // 'x -> (quote x) + (vec![Quote, Symbol("x")], vec![quote(Sym("x"))]), + // '42 -> (quote 42) + (vec![Quote, Number("42")], vec![quote(Int(42))]), + // '() -> (quote nil) + (vec![Quote, LeftPar, RightPar], vec![quote(Nil)]), + // ''x -> (quote (quote x)) + ( + vec![Quote, Quote, Symbol("x")], + vec![quote(quote(Sym("x")))], + ), + // '''x -> (quote (quote (quote x))) + ( + vec![Quote, Quote, Quote, Symbol("x")], + vec![quote(quote(quote(Sym("x"))))], + ), + // '(1 2) -> (quote (1 2)) + ( + vec![Quote, LeftPar, Number("1"), Number("2"), RightPar], + vec![quote(List(vec![Int(1), Int(2)]))], + ), + // (list 'a 'b) -> (list (quote a) (quote b)) + ( + vec![ + LeftPar, + Symbol("list"), + Quote, + Symbol("a"), + Quote, + Symbol("b"), + RightPar, + ], + vec![List(vec![Sym("list"), quote(Sym("a")), quote(Sym("b"))])], + ), + ]; + for (tokens, expected) in cases { + assert_eq!(parse(tokens.clone()), expected, "input: {tokens:?}"); + } +} + +#[test] +fn test_top_level() { + let cases = vec![ + ( + vec![Number("1"), Number("2"), Number("3")], + vec![Int(1), Int(2), Int(3)], + ), + ( + vec![ + LeftPar, + Symbol("a"), + RightPar, + LeftPar, + Symbol("b"), + RightPar, + ], + vec![List(vec![Sym("a")]), List(vec![Sym("b")])], + ), + ]; + for (tokens, expected) in cases { + assert_eq!(parse(tokens.clone()), expected, "input: {tokens:?}"); + } +} + +fn parse_err(tokens: Vec>) -> Error { + Parser::new(dummy_tokens(tokens)).parse().unwrap_err().inner +} + +#[test] +fn test_unexpected_right_par() { + let cases = vec![ + vec![RightPar], + vec![Number("1"), RightPar], + vec![LeftPar, Symbol("a"), RightPar, RightPar], + vec![Quote, RightPar], + vec![LeftPar, Quote, Quote, RightPar], + ]; + for tokens in cases { + assert_eq!( + parse_err(tokens.clone()), + Error::UnexpectedRightPar, + "input: {tokens:?}" + ); + } +} + +#[test] +fn test_unclosed_left_par() { + let cases = vec![ + vec![LeftPar], + vec![LeftPar, Number("1")], + vec![LeftPar, LeftPar, Symbol("a"), RightPar], + ]; + for tokens in cases { + assert_eq!( + parse_err(tokens.clone()), + Error::UnclosedLeftPar, + "input: {tokens:?}" + ); + } +} + +#[test] +fn test_unexpected_eof_after_quote() { + let cases = vec![ + vec![Quote], + vec![Quote, Quote], + vec![Symbol("a"), Quote], + vec![LeftPar, Quote], + vec![Quote, LeftPar, Quote], + ]; + for tokens in cases { + assert_eq!( + parse_err(tokens.clone()), + Error::UnexpectedEof, + "input: {tokens:?}" + ); + } +} + +#[test] +fn test_invalid_integer() { + let cases = vec![ + "99999999999999999999", + "-99999999999999999999", + "+", + "-", + "", + "1_000", + "0x10", + "1 2", + "++1", + "--1", + "1,5", + "10,40", + ]; + for number in cases { + let tokens = vec![Number(number)]; + let error = number.parse::().unwrap_err(); + assert_eq!( + parse_err(tokens.clone()), + Error::InvalidIntegerLiteral(error), + "input: {tokens:?}", + ); + } +} + +#[test] +fn test_invalid_float() { + let cases = vec![ + "12somE0txt", + "12som.0txt", + "12.3txt", + "1.2.3", + "1.2.3.4", + ".", + "+.", + "-.", + "1e", + "1e+", + "1e-", + "1.e", + ".e5", + "1ee5", + "1e1.5", + ]; + for number in cases { + let tokens = vec![Number(number)]; + let error = number.parse::().unwrap_err(); + assert_eq!( + parse_err(tokens.clone()), + Error::InvalidFloatLiteral(error), + "input: {tokens:?}", + ); + } +} + +#[test] +fn test_unclosed_string() { + let cases = vec![ + ( + vec![UnclosedString("oops")], + Error::UnclosedString("oops".into()), + ), + ( + vec![String("oops\\")], + Error::UnclosedString("oops\\".into()), + ), + ]; + for (tokens, error) in cases { + assert_eq!(parse_err(tokens.clone()), error, "input: {tokens:?}"); + } +} + +#[test] +fn test_unexpected_escape_char_propagates() { + let tokens = vec![String(r"a\q")]; + assert_eq!( + parse_err(tokens.clone()), + Error::UnexpectedEscapeChar('q'), + "input: {tokens:?}" + ); +} + +#[test] +fn test_recursion_limit() { + let mut tokens = Vec::with_capacity(MAX_DEPTH * 2); + tokens.extend(repeat_n(LeftPar, MAX_DEPTH)); + tokens.extend(repeat_n(RightPar, MAX_DEPTH)); + assert_eq!( + parse_err(tokens), + Error::RecursionLimit, + "input: {} LeftPar then {} RightPar", + MAX_DEPTH, + MAX_DEPTH, + ); +} + +fn p(line: usize, column: usize, offset: usize) -> Pos { + Pos::new(line, column, offset) +} + +fn sp(s: (usize, usize, usize), e: (usize, usize, usize)) -> Span { + Span::new(p(s.0, s.1, s.2), p(e.0, e.1, e.2)) +} + +fn tsp(t: Token<'static>, span: Span) -> Spanned> { + Spanned::new(t, span) +} + +fn parse_sp(tokens: Vec>>) -> Vec> { + Parser::new(tokens.into_iter()) + .parse() + .unwrap() + .into_inner() + .into_iter() + .collect() +} + +fn parse_sp_err(tokens: Vec>>) -> Spanned { + Parser::new(tokens.into_iter()).parse().unwrap_err() +} + +#[test] +fn test_span_atom() { + let s = sp((1, 0, 0), (1, 2, 2)); + let tokens = vec![tsp(Number("42"), s)]; + let prog = parse_sp(tokens.clone()); + + assert_eq!(prog[0].span, s, "input: {tokens:?}"); +} + +#[test] +fn test_span_list_covers_parens() { + // (foo) + let lp = sp((1, 0, 0), (1, 1, 1)); + let foo = sp((1, 1, 1), (1, 4, 4)); + let rp = sp((1, 4, 4), (1, 5, 5)); + let tokens = vec![tsp(LeftPar, lp), tsp(Symbol("foo"), foo), tsp(RightPar, rp)]; + let prog = parse_sp(tokens.clone()); + + assert_eq!(prog[0].span, sp((1, 0, 0), (1, 5, 5)), "input: {tokens:?}"); + + if let Expr::List(items) = &prog[0].inner { + assert_eq!(items[0].span, foo, "input: {tokens:?}"); + } else { + panic!("expected list, input: {tokens:?}"); + } +} + +#[test] +fn test_span_empty_list_covers_parens() { + // () -> Atom::Nil, span [0..2] + let lp = sp((1, 0, 0), (1, 1, 1)); + let rp = sp((1, 1, 1), (1, 2, 2)); + let tokens = vec![tsp(LeftPar, lp), tsp(RightPar, rp)]; + let prog = parse_sp(tokens.clone()); + + assert_eq!(prog[0].inner, Expr::Atom(Atom::Nil), "input: {tokens:?}"); + assert_eq!(prog[0].span, sp((1, 0, 0), (1, 2, 2)), "input: {tokens:?}"); +} + +#[test] +fn test_span_quote_atom() { + // 'x -> outer = [0..2]; Sym(quote) = [0..1]; x = [1..2] + let q = sp((1, 0, 0), (1, 1, 1)); + let x = sp((1, 1, 1), (1, 2, 2)); + let tokens = vec![tsp(Quote, q), tsp(Symbol("x"), x)]; + let prog = parse_sp(tokens.clone()); + + assert_eq!(prog[0].span, sp((1, 0, 0), (1, 2, 2)), "input: {tokens:?}"); + + if let Expr::List(items) = &prog[0].inner { + assert_eq!( + items[0].inner, + Expr::Atom(Atom::Symbol(Rc::from("quote"))), + "input: {tokens:?}", + ); + assert_eq!(items[0].span, q, "input: {tokens:?}"); + assert_eq!(items[1].span, x, "input: {tokens:?}"); + } else { + panic!("expected list, input: {tokens:?}"); + } +} + +#[test] +fn test_span_quote_of_list() { + // '(quote x) + // 0 1 2..7 8 9 10 + let q = sp((1, 0, 0), (1, 1, 1)); + let lp = sp((1, 1, 1), (1, 2, 2)); + let q_sym = sp((1, 2, 2), (1, 7, 7)); + let x_sym = sp((1, 8, 8), (1, 9, 9)); + let rp = sp((1, 9, 9), (1, 10, 10)); + + let tokens = vec![ + tsp(Quote, q), + tsp(LeftPar, lp), + tsp(Symbol("quote"), q_sym), + tsp(Symbol("x"), x_sym), + tsp(RightPar, rp), + ]; + let prog = parse_sp(tokens.clone()); + + let outer = &prog[0]; // (quote (quote x)) + assert_eq!(outer.span, sp((1, 0, 0), (1, 10, 10)), "input: {tokens:?}"); + + if let Expr::List(items) = &outer.inner { + assert_eq!(items[0].span, q, "input: {tokens:?}"); + assert_eq!( + items[1].span, + sp((1, 1, 1), (1, 10, 10)), + "input: {tokens:?}" + ); + } else { + panic!("expected list, input: {tokens:?}"); + } +} + +#[test] +fn test_error_span_unexpected_right_par() { + let s = sp((1, 5, 5), (1, 6, 6)); + let tokens = vec![tsp(RightPar, s)]; + let err = parse_sp_err(tokens.clone()); + + assert_eq!(err.inner, Error::UnexpectedRightPar, "input: {tokens:?}"); + assert_eq!(err.span, s, "input: {tokens:?}"); +} + +#[test] +fn test_error_span_invalid_integer() { + let s = sp((1, 0, 0), (1, 20, 20)); + let tokens = vec![tsp(Number("99999999999999999999"), s)]; + let err = parse_sp_err(tokens.clone()); + + assert_eq!(err.span, s, "input: {tokens:?}"); +} + +#[test] +fn test_error_span_invalid_float() { + let s = sp((1, 0, 0), (1, 5, 5)); + let tokens = vec![tsp(Number("1.2.3"), s)]; + let err = parse_sp_err(tokens.clone()); + + assert!( + matches!(err.inner, Error::InvalidFloatLiteral(_)), + "input: {tokens:?}, got: {:?}", + err.inner, + ); + assert_eq!(err.span, s, "input: {tokens:?}"); +} + +#[test] +fn test_error_span_unclosed_left_par() { + let lp = sp((1, 0, 0), (1, 1, 1)); + let tokens = vec![tsp(LeftPar, lp)]; + let err = parse_sp_err(tokens.clone()); + + assert_eq!(err.inner, Error::UnclosedLeftPar, "input: {tokens:?}"); + assert_eq!(err.span, lp, "input: {tokens:?}"); +} + +#[test] +fn test_error_span_unclosed_left_par_nested() { + let outer = sp((1, 0, 0), (1, 1, 1)); + let inner = sp((1, 2, 2), (1, 3, 3)); + let tokens = vec![tsp(LeftPar, outer), tsp(LeftPar, inner)]; + let err = parse_sp_err(tokens.clone()); + + assert_eq!(err.inner, Error::UnclosedLeftPar, "input: {tokens:?}"); + assert_eq!(err.span, inner, "input: {tokens:?}"); +} + +#[test] +fn test_error_span_unexpected_eof_after_quote() { + let q = sp((1, 0, 0), (1, 1, 1)); + let tokens = vec![tsp(Quote, q)]; + let err = parse_sp_err(tokens.clone()); + + assert_eq!(err.inner, Error::UnexpectedEof, "input: {tokens:?}"); + assert_eq!(err.span, q, "input: {tokens:?}"); +} + +#[test] +fn test_error_span_unclosed_string() { + let s = sp((1, 0, 0), (1, 7, 7)); + let tokens = vec![tsp(UnclosedString("oops"), s)]; + let err = parse_sp_err(tokens.clone()); + + assert_eq!( + err.inner, + Error::UnclosedString("oops".into()), + "input: {tokens:?}", + ); + assert_eq!(err.span, s, "input: {tokens:?}"); +} + +#[test] +fn test_error_span_unexpected_escape_char() { + let s = sp((1, 0, 0), (1, 5, 5)); + let tokens = vec![tsp(String(r"a\q"), s)]; + let err = parse_sp_err(tokens.clone()); + + assert_eq!( + err.inner, + Error::UnexpectedEscapeChar('q'), + "input: {tokens:?}" + ); + assert_eq!(err.span, s, "input: {tokens:?}"); +} + +#[test] +fn test_error_span_deep_nested() { + // (a (b (c BAD))) + let bad_span = sp((1, 9, 9), (1, 29, 29)); + let any = sp((1, 0, 0), (1, 1, 1)); + let tokens = vec![ + tsp(LeftPar, any), + tsp(Symbol("a"), any), + tsp(LeftPar, any), + tsp(Symbol("b"), any), + tsp(LeftPar, any), + tsp(Symbol("c"), any), + tsp(Number("99999999999999999999"), bad_span), + tsp(RightPar, any), + tsp(RightPar, any), + tsp(RightPar, any), + ]; + let err = parse_sp_err(tokens.clone()); + + assert!( + matches!(err.inner, Error::InvalidIntegerLiteral(_)), + "input: {tokens:?}, got: {:?}", + err.inner, + ); + assert_eq!(err.span, bad_span, "input: {tokens:?}"); +} + +#[test] +fn test_error_span_recursion_limit() { + let mut tokens = Vec::with_capacity(MAX_DEPTH); + for i in 0..MAX_DEPTH { + let s = sp((1, i, i), (1, i + 1, i + 1)); + tokens.push(tsp(LeftPar, s)); + } + let trigger = sp((1, MAX_DEPTH - 1, MAX_DEPTH - 1), (1, MAX_DEPTH, MAX_DEPTH)); + let err = parse_sp_err(tokens); + + assert_eq!(err.inner, Error::RecursionLimit, "MAX_DEPTH={MAX_DEPTH}"); + assert_eq!(err.span, trigger, "MAX_DEPTH={MAX_DEPTH}"); +} -- cgit v1.3