From 160b64427d79290a59ac48c9babca064232d8dfd Mon Sep 17 00:00:00 2001 From: Tolmachev Igor Date: Sat, 9 May 2026 20:47:04 +0300 Subject: Make project structure more consistent --- compiler/src/lex/lexer.rs | 161 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 compiler/src/lex/lexer.rs (limited to 'compiler/src/lex/lexer.rs') diff --git a/compiler/src/lex/lexer.rs b/compiler/src/lex/lexer.rs new file mode 100644 index 0000000..801d382 --- /dev/null +++ b/compiler/src/lex/lexer.rs @@ -0,0 +1,161 @@ +use crate::{ + lex::Token, + span::{Pos, Span, Spanned}, +}; + +fn is_terminator(ch: char) -> bool { + ch.is_whitespace() || matches!(ch, '(' | ')' | '\'' | '"' | ';') +} + +pub struct Lexer<'a> { + input: &'a str, + cursor: usize, + + line: usize, + column: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + Self { + input, + cursor: 0, + + line: 1, + column: 0, + } + } + + fn rest(&self) -> &str { + &self.input[self.cursor..] + } + + fn peek(&self) -> Option { + self.rest().chars().next() + } + + fn peek_nth(&self, n: usize) -> Option { + self.rest().chars().nth(n) + } + + fn consume(&mut self) -> Option { + let ch = self.peek()?; + + self.cursor += ch.len_utf8(); + if ch == '\n' { + self.line += 1; + self.column = 0; + } else { + self.column += 1; + } + + Some(ch) + } + + fn next_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str { + let start = self.cursor; + + while let Some(ch) = self.peek() { + if !predicate(ch) { + break; + } + self.consume(); + } + + &self.input[start..self.cursor] + } + + fn next_atom(&mut self) -> &'a str { + self.next_while(|ch| !is_terminator(ch)) + } + + fn next_string(&mut self) -> Result<&'a str, &'a str> { + debug_assert_eq!(self.peek(), Some('"')); + self.consume(); + + let start = self.cursor; + + while let Some(ch) = self.peek() { + match ch { + '"' => { + let string = &self.input[start..self.cursor]; + self.consume(); + return Ok(string); + } + '\n' => { + let string = &self.input[start..self.cursor]; + self.consume(); + return Err(string); + } + '\\' => { + self.consume(); + self.consume(); + } + _ => { + self.consume(); + } + } + } + + Err(&self.input[start..self.cursor]) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Spanned>; + + fn next(&mut self) -> Option { + loop { + match self.peek()? { + ch if ch.is_whitespace() => { + self.next_while(char::is_whitespace); + } + ';' => { + self.next_while(|ch| ch != '\n'); + } + _ => break, + } + } + + let start = Pos::new(self.line, self.column, self.cursor); + + let token = match self.peek()? { + '(' => { + self.consume(); + Token::LeftPar + } + ')' => { + self.consume(); + Token::RightPar + } + '\'' => { + self.consume(); + Token::Quote + } + + // Number + ch if ch.is_ascii_digit() + || ch == '.' && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) + || matches!(ch, '+' | '-') + && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) + || matches!(ch, '+' | '-') + && self.peek_nth(1).is_some_and(|ch| ch == '.') + && self.peek_nth(2).is_some_and(|ch| ch.is_ascii_digit()) => + { + Token::Number(self.next_atom()) + } + + // String + '"' => match self.next_string() { + Ok(string) => Token::String(string), + Err(string) => Token::UnclosedString(string), + }, + + // Symbol + _ => Token::Symbol(self.next_atom()), + }; + + let end = Pos::new(self.line, self.column, self.cursor); + Some(Spanned::new(token, Span::new(start, end))) + } +} -- cgit v1.3