diff options
| author | Tolmachev Igor <me@igorek.dev> | 2026-05-08 15:25:55 +0300 |
|---|---|---|
| committer | Tolmachev Igor <me@igorek.dev> | 2026-05-08 15:25:55 +0300 |
| commit | 58b937521f3e459089c0d475551bf9a49f930657 (patch) | |
| tree | 3c7e33c914445e3b6448ffc287cf70038c5e080c | |
| parent | 558c5dcaf7bcc32cfe5672c4113962e3bcd19188 (diff) | |
| download | crisp-58b937521f3e459089c0d475551bf9a49f930657.tar.gz crisp-58b937521f3e459089c0d475551bf9a49f930657.zip | |
Fold unclosed string error into Token variant
UnclosedString was the only error variant, making lexer::Error redundant. Also removes Result from
the Iterator impl.
| -rw-r--r-- | compiler/src/lexer/error.rs | 18 | ||||
| -rw-r--r-- | compiler/src/lexer/mod.rs | 31 | ||||
| -rw-r--r-- | compiler/src/lexer/tests.rs | 49 |
3 files changed, 36 insertions, 62 deletions
diff --git a/compiler/src/lexer/error.rs b/compiler/src/lexer/error.rs deleted file mode 100644 index f251167..0000000 --- a/compiler/src/lexer/error.rs +++ /dev/null | |||
| @@ -1,18 +0,0 @@ | |||
| 1 | use std::{error, fmt, result}; | ||
| 2 | |||
| 3 | pub type Result<T> = result::Result<T, Error>; | ||
| 4 | |||
| 5 | #[derive(Debug, PartialEq, Eq)] | ||
| 6 | pub enum Error { | ||
| 7 | UnclosedString, | ||
| 8 | } | ||
| 9 | |||
| 10 | impl fmt::Display for Error { | ||
| 11 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
| 12 | match self { | ||
| 13 | Error::UnclosedString => write!(f, "unclosed string literal"), | ||
| 14 | } | ||
| 15 | } | ||
| 16 | } | ||
| 17 | |||
| 18 | impl error::Error for Error {} | ||
diff --git a/compiler/src/lexer/mod.rs b/compiler/src/lexer/mod.rs index 2ef4922..ff7d51d 100644 --- a/compiler/src/lexer/mod.rs +++ b/compiler/src/lexer/mod.rs | |||
| @@ -1,7 +1,4 @@ | |||
| 1 | mod error; | ||
| 2 | |||
| 3 | use crate::span::{Pos, Span}; | 1 | use crate::span::{Pos, Span}; |
| 4 | pub use error::{Error, Result}; | ||
| 5 | 2 | ||
| 6 | #[cfg(test)] | 3 | #[cfg(test)] |
| 7 | mod tests; | 4 | mod tests; |
| @@ -17,6 +14,7 @@ pub enum Token<'a> { | |||
| 17 | Quote, | 14 | Quote, |
| 18 | Number(&'a str), | 15 | Number(&'a str), |
| 19 | String(&'a str), | 16 | String(&'a str), |
| 17 | UnclosedString(&'a str), | ||
| 20 | Symbol(&'a str), | 18 | Symbol(&'a str), |
| 21 | } | 19 | } |
| 22 | 20 | ||
| @@ -82,7 +80,7 @@ impl<'a> Lexer<'a> { | |||
| 82 | self.next_while(|ch| !is_terminator(ch)) | 80 | self.next_while(|ch| !is_terminator(ch)) |
| 83 | } | 81 | } |
| 84 | 82 | ||
| 85 | fn next_string(&mut self) -> Result<&'a str> { | 83 | fn next_string(&mut self) -> Result<&'a str, &'a str> { |
| 86 | debug_assert_eq!(self.peek(), Some('"')); | 84 | debug_assert_eq!(self.peek(), Some('"')); |
| 87 | self.consume(); | 85 | self.consume(); |
| 88 | 86 | ||
| @@ -95,7 +93,11 @@ impl<'a> Lexer<'a> { | |||
| 95 | self.consume(); | 93 | self.consume(); |
| 96 | return Ok(string); | 94 | return Ok(string); |
| 97 | } | 95 | } |
| 98 | '\n' => return Err(Error::UnclosedString), | 96 | '\n' => { |
| 97 | let string = &self.input[start..self.cursor]; | ||
| 98 | self.consume(); | ||
| 99 | return Err(string); | ||
| 100 | } | ||
| 99 | '\\' => { | 101 | '\\' => { |
| 100 | self.consume(); | 102 | self.consume(); |
| 101 | self.consume(); | 103 | self.consume(); |
| @@ -106,12 +108,12 @@ impl<'a> Lexer<'a> { | |||
| 106 | } | 108 | } |
| 107 | } | 109 | } |
| 108 | 110 | ||
| 109 | Err(Error::UnclosedString) | 111 | Err(&self.input[start..self.cursor]) |
| 110 | } | 112 | } |
| 111 | } | 113 | } |
| 112 | 114 | ||
| 113 | impl<'a> Iterator for Lexer<'a> { | 115 | impl<'a> Iterator for Lexer<'a> { |
| 114 | type Item = Span<Result<Token<'a>>>; | 116 | type Item = Span<Token<'a>>; |
| 115 | 117 | ||
| 116 | fn next(&mut self) -> Option<Self::Item> { | 118 | fn next(&mut self) -> Option<Self::Item> { |
| 117 | loop { | 119 | loop { |
| @@ -131,15 +133,15 @@ impl<'a> Iterator for Lexer<'a> { | |||
| 131 | let token = match self.peek()? { | 133 | let token = match self.peek()? { |
| 132 | '(' => { | 134 | '(' => { |
| 133 | self.consume(); | 135 | self.consume(); |
| 134 | Ok(Token::LeftPar) | 136 | Token::LeftPar |
| 135 | } | 137 | } |
| 136 | ')' => { | 138 | ')' => { |
| 137 | self.consume(); | 139 | self.consume(); |
| 138 | Ok(Token::RightPar) | 140 | Token::RightPar |
| 139 | } | 141 | } |
| 140 | '\'' => { | 142 | '\'' => { |
| 141 | self.consume(); | 143 | self.consume(); |
| 142 | Ok(Token::Quote) | 144 | Token::Quote |
| 143 | } | 145 | } |
| 144 | 146 | ||
| 145 | // Number | 147 | // Number |
| @@ -147,14 +149,17 @@ impl<'a> Iterator for Lexer<'a> { | |||
| 147 | || matches!(ch, '+' | '-' | '.') | 149 | || matches!(ch, '+' | '-' | '.') |
| 148 | && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) => | 150 | && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) => |
| 149 | { | 151 | { |
| 150 | Ok(Token::Number(self.next_atom())) | 152 | Token::Number(self.next_atom()) |
| 151 | } | 153 | } |
| 152 | 154 | ||
| 153 | // String | 155 | // String |
| 154 | '"' => self.next_string().map(Token::String), | 156 | '"' => match self.next_string() { |
| 157 | Ok(string) => Token::String(string), | ||
| 158 | Err(string) => Token::UnclosedString(string), | ||
| 159 | }, | ||
| 155 | 160 | ||
| 156 | // Symbol | 161 | // Symbol |
| 157 | _ => Ok(Token::Symbol(self.next_atom())), | 162 | _ => Token::Symbol(self.next_atom()), |
| 158 | }; | 163 | }; |
| 159 | 164 | ||
| 160 | let end = Pos::new(self.line, self.column, self.cursor); | 165 | let end = Pos::new(self.line, self.column, self.cursor); |
diff --git a/compiler/src/lexer/tests.rs b/compiler/src/lexer/tests.rs index 65dd2f2..30be85a 100644 --- a/compiler/src/lexer/tests.rs +++ b/compiler/src/lexer/tests.rs | |||
| @@ -4,7 +4,7 @@ use super::Token::*; | |||
| 4 | use super::*; | 4 | use super::*; |
| 5 | 5 | ||
| 6 | fn tokenize<'a>(input: &'a str) -> Vec<Token<'a>> { | 6 | fn tokenize<'a>(input: &'a str) -> Vec<Token<'a>> { |
| 7 | Lexer::new(input).map(|s| s.into_inner().unwrap()).collect() | 7 | Lexer::new(input).map(|s| s.into_inner()).collect() |
| 8 | } | 8 | } |
| 9 | 9 | ||
| 10 | #[test] | 10 | #[test] |
| @@ -103,6 +103,22 @@ fn test_string_escapes() { | |||
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | #[test] | 105 | #[test] |
| 106 | fn test_unclosed_strings() { | ||
| 107 | let cases = vec![ | ||
| 108 | (r#""abc"#, vec![UnclosedString("abc")]), | ||
| 109 | (r#""abc\""#, vec![UnclosedString(r#"abc\""#)]), | ||
| 110 | ("\"abc\n", vec![UnclosedString("abc")]), | ||
| 111 | ("\"abc\\\ndef", vec![UnclosedString("abc\\\ndef")]), | ||
| 112 | ("\"abc\n\"def\"", vec![UnclosedString("abc"), String("def")]), | ||
| 113 | (r#"""#, vec![UnclosedString("")]), | ||
| 114 | ("\"\n\"", vec![UnclosedString(""), UnclosedString("")]), | ||
| 115 | ]; | ||
| 116 | for (code, tokens) in cases { | ||
| 117 | assert_eq!(tokenize(code), tokens); | ||
| 118 | } | ||
| 119 | } | ||
| 120 | |||
| 121 | #[test] | ||
| 106 | fn test_symbols() { | 122 | fn test_symbols() { |
| 107 | let cases = vec![ | 123 | let cases = vec![ |
| 108 | ("foo", vec![Symbol("foo")]), | 124 | ("foo", vec![Symbol("foo")]), |
| @@ -272,35 +288,6 @@ fn test_comments() { | |||
| 272 | } | 288 | } |
| 273 | } | 289 | } |
| 274 | 290 | ||
| 275 | fn first_error(input: &str) -> Error { | ||
| 276 | Lexer::new(input) | ||
| 277 | .find_map(|s| s.into_inner().err()) | ||
| 278 | .expect("error expected") | ||
| 279 | } | ||
| 280 | |||
| 281 | #[test] | ||
| 282 | fn test_unclosed_string_at_eof() { | ||
| 283 | assert_eq!(first_error(r#""abc"#), Error::UnclosedString); | ||
| 284 | assert_eq!(first_error(r#"""#), Error::UnclosedString); | ||
| 285 | } | ||
| 286 | |||
| 287 | #[test] | ||
| 288 | fn test_unclosed_string_with_trailing_escape() { | ||
| 289 | assert_eq!(first_error("\"abc\\"), Error::UnclosedString); | ||
| 290 | } | ||
| 291 | |||
| 292 | #[test] | ||
| 293 | fn test_unclosed_string_with_newline() { | ||
| 294 | assert_eq!(first_error("\"abc\ndef\""), Error::UnclosedString); | ||
| 295 | } | ||
| 296 | |||
| 297 | #[test] | ||
| 298 | fn test_lexer_stops_after_string_error() { | ||
| 299 | let mut lex = Lexer::new(r#""abc"#); | ||
| 300 | assert!(lex.next().unwrap().into_inner().is_err()); | ||
| 301 | assert!(lex.next().is_none()); | ||
| 302 | } | ||
| 303 | |||
| 304 | fn spans(input: &str) -> Vec<(Pos, Pos)> { | 291 | fn spans(input: &str) -> Vec<(Pos, Pos)> { |
| 305 | Lexer::new(input).map(|s| (s.start(), s.end())).collect() | 292 | Lexer::new(input).map(|s| (s.start(), s.end())).collect() |
| 306 | } | 293 | } |
| @@ -324,7 +311,7 @@ fn test_span_after_newline() { | |||
| 324 | } | 311 | } |
| 325 | 312 | ||
| 326 | #[test] | 313 | #[test] |
| 327 | fn test_span_multi_char_() { | 314 | fn test_span_multi_char() { |
| 328 | let s = spans("foo"); | 315 | let s = spans("foo"); |
| 329 | assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 3, 3))]); | 316 | assert_eq!(s, vec![(Pos::new(1, 0, 0), Pos::new(1, 3, 3))]); |
| 330 | } | 317 | } |
