From 558c5dcaf7bcc32cfe5672c4113962e3bcd19188 Mon Sep 17 00:00:00 2001 From: Tolmachev Igor Date: Thu, 7 May 2026 17:46:44 +0300 Subject: Add lexer --- compiler/src/lexer/mod.rs | 163 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 compiler/src/lexer/mod.rs (limited to 'compiler/src/lexer/mod.rs') diff --git a/compiler/src/lexer/mod.rs b/compiler/src/lexer/mod.rs new file mode 100644 index 0000000..2ef4922 --- /dev/null +++ b/compiler/src/lexer/mod.rs @@ -0,0 +1,163 @@ +mod error; + +use crate::span::{Pos, Span}; +pub use error::{Error, Result}; + +#[cfg(test)] +mod tests; + +fn is_terminator(ch: char) -> bool { + ch.is_whitespace() || matches!(ch, '(' | ')' | '\'' | '"' | ';') +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Token<'a> { + LeftPar, + RightPar, + Quote, + Number(&'a str), + String(&'a str), + Symbol(&'a str), +} + +pub struct Lexer<'a> { + input: &'a str, + cursor: usize, + + line: usize, + column: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + Self { + input, + cursor: 0, + + line: 1, + column: 0, + } + } + + fn rest(&self) -> &str { + &self.input[self.cursor..] + } + + fn peek(&self) -> Option { + self.rest().chars().next() + } + + fn peek_nth(&self, n: usize) -> Option { + self.rest().chars().nth(n) + } + + fn consume(&mut self) -> Option { + let ch = self.peek()?; + + self.cursor += ch.len_utf8(); + if ch == '\n' { + self.line += 1; + self.column = 0; + } else { + self.column += 1; + } + + Some(ch) + } + + fn next_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str { + let start = self.cursor; + + while let Some(ch) = self.peek() { + if !predicate(ch) { + break; + } + self.consume(); + } + + &self.input[start..self.cursor] + } + + fn next_atom(&mut self) -> &'a str { + self.next_while(|ch| !is_terminator(ch)) + } + + fn next_string(&mut self) -> Result<&'a str> { + debug_assert_eq!(self.peek(), Some('"')); + self.consume(); + + let start = self.cursor; + + while let Some(ch) = self.peek() { + match ch { + '"' => { + let string = &self.input[start..self.cursor]; + self.consume(); + return Ok(string); + } + '\n' => return Err(Error::UnclosedString), + '\\' => { + self.consume(); + self.consume(); + } + _ => { + self.consume(); + } + } + } + + Err(Error::UnclosedString) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Span>>; + + fn next(&mut self) -> Option { + loop { + match self.peek()? { + ch if ch.is_whitespace() => { + self.next_while(char::is_whitespace); + } + ';' => { + self.next_while(|ch| ch != '\n'); + } + _ => break, + } + } + + let start = Pos::new(self.line, self.column, self.cursor); + + let token = match self.peek()? { + '(' => { + self.consume(); + Ok(Token::LeftPar) + } + ')' => { + self.consume(); + Ok(Token::RightPar) + } + '\'' => { + self.consume(); + Ok(Token::Quote) + } + + // Number + ch if ch.is_ascii_digit() + || matches!(ch, '+' | '-' | '.') + && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) => + { + Ok(Token::Number(self.next_atom())) + } + + // String + '"' => self.next_string().map(Token::String), + + // Symbol + _ => Ok(Token::Symbol(self.next_atom())), + }; + + let end = Pos::new(self.line, self.column, self.cursor); + Some(Span::new(token, start, end)) + } +} -- cgit v1.3