From 60ad7b994c2126346c19769a1a5f5c8f679a05ee Mon Sep 17 00:00:00 2001 From: Tolmachev Igor Date: Sat, 9 May 2026 15:25:49 +0300 Subject: Fix lexer processing [+-].\d as symbol instead of number Extended lookahead in the number branch to 3 chars. Added tests for "-.5", "+.5", "-.0" in test_numbers and for "-.", "+.", ".", "+.a", "-.a" in test_ambiguous. --- compiler/src/lexer/mod.rs | 8 ++++++-- compiler/src/lexer/tests.rs | 10 +++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/compiler/src/lexer/mod.rs b/compiler/src/lexer/mod.rs index 464d88e..f3c8b76 100644 --- a/compiler/src/lexer/mod.rs +++ b/compiler/src/lexer/mod.rs @@ -146,8 +146,12 @@ impl<'a> Iterator for Lexer<'a> { // Number ch if ch.is_ascii_digit() - || matches!(ch, '+' | '-' | '.') - && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) => + || ch == '.' && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) + || matches!(ch, '+' | '-') + && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) + || matches!(ch, '+' | '-') + && self.peek_nth(1).is_some_and(|ch| ch == '.') + && self.peek_nth(2).is_some_and(|ch| ch.is_ascii_digit()) => { Token::Number(self.next_atom()) } diff --git a/compiler/src/lexer/tests.rs b/compiler/src/lexer/tests.rs index 89575c7..6f96c65 100644 --- a/compiler/src/lexer/tests.rs +++ b/compiler/src/lexer/tests.rs @@ -1,7 +1,7 @@ use crate::span::Pos; use super::Token::*; -use super::*; +use super::{Lexer, Token}; fn tokenize<'a>(input: &'a str) -> Vec> { Lexer::new(input).map(|s| s.inner).collect() @@ -67,6 +67,9 @@ fn test_numbers() { ("1e10", vec![Number("1e10")]), ("1.5e-3", vec![Number("1.5e-3")]), (".5", vec![Number(".5")]), + ("-.5", vec![Number("-.5")]), + ("+.5", vec![Number("+.5")]), + ("-.0", vec![Number("-.0")]), ]; for (code, tokens) in cases { assert_eq!(tokenize(code), tokens); @@ -149,6 +152,11 @@ fn test_ambiguous() { ("+foo", vec![Symbol("+foo")]), ("...", vec![Symbol("...")]), (".foo", vec![Symbol(".foo")]), + ("-.", vec![Symbol("-.")]), + ("+.", vec![Symbol("+.")]), + (".", vec![Symbol(".")]), + ("+.a", vec![Symbol("+.a")]), + ("-.a", vec![Symbol("-.a")]), ]; for (code, tokens) in cases { assert_eq!(tokenize(code), tokens); -- cgit v1.3