aboutsummaryrefslogtreecommitdiff
path: root/compiler/src/lexer/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/src/lexer/mod.rs')
-rw-r--r--compiler/src/lexer/mod.rs163
1 files changed, 163 insertions, 0 deletions
diff --git a/compiler/src/lexer/mod.rs b/compiler/src/lexer/mod.rs
new file mode 100644
index 0000000..2ef4922
--- /dev/null
+++ b/compiler/src/lexer/mod.rs
@@ -0,0 +1,163 @@
1mod error;
2
3use crate::span::{Pos, Span};
4pub use error::{Error, Result};
5
6#[cfg(test)]
7mod tests;
8
9fn is_terminator(ch: char) -> bool {
10 ch.is_whitespace() || matches!(ch, '(' | ')' | '\'' | '"' | ';')
11}
12
13#[derive(Clone, Copy, Debug, PartialEq, Eq)]
14pub enum Token<'a> {
15 LeftPar,
16 RightPar,
17 Quote,
18 Number(&'a str),
19 String(&'a str),
20 Symbol(&'a str),
21}
22
23pub struct Lexer<'a> {
24 input: &'a str,
25 cursor: usize,
26
27 line: usize,
28 column: usize,
29}
30
31impl<'a> Lexer<'a> {
32 pub fn new(input: &'a str) -> Self {
33 Self {
34 input,
35 cursor: 0,
36
37 line: 1,
38 column: 0,
39 }
40 }
41
42 fn rest(&self) -> &str {
43 &self.input[self.cursor..]
44 }
45
46 fn peek(&self) -> Option<char> {
47 self.rest().chars().next()
48 }
49
50 fn peek_nth(&self, n: usize) -> Option<char> {
51 self.rest().chars().nth(n)
52 }
53
54 fn consume(&mut self) -> Option<char> {
55 let ch = self.peek()?;
56
57 self.cursor += ch.len_utf8();
58 if ch == '\n' {
59 self.line += 1;
60 self.column = 0;
61 } else {
62 self.column += 1;
63 }
64
65 Some(ch)
66 }
67
68 fn next_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
69 let start = self.cursor;
70
71 while let Some(ch) = self.peek() {
72 if !predicate(ch) {
73 break;
74 }
75 self.consume();
76 }
77
78 &self.input[start..self.cursor]
79 }
80
81 fn next_atom(&mut self) -> &'a str {
82 self.next_while(|ch| !is_terminator(ch))
83 }
84
85 fn next_string(&mut self) -> Result<&'a str> {
86 debug_assert_eq!(self.peek(), Some('"'));
87 self.consume();
88
89 let start = self.cursor;
90
91 while let Some(ch) = self.peek() {
92 match ch {
93 '"' => {
94 let string = &self.input[start..self.cursor];
95 self.consume();
96 return Ok(string);
97 }
98 '\n' => return Err(Error::UnclosedString),
99 '\\' => {
100 self.consume();
101 self.consume();
102 }
103 _ => {
104 self.consume();
105 }
106 }
107 }
108
109 Err(Error::UnclosedString)
110 }
111}
112
113impl<'a> Iterator for Lexer<'a> {
114 type Item = Span<Result<Token<'a>>>;
115
116 fn next(&mut self) -> Option<Self::Item> {
117 loop {
118 match self.peek()? {
119 ch if ch.is_whitespace() => {
120 self.next_while(char::is_whitespace);
121 }
122 ';' => {
123 self.next_while(|ch| ch != '\n');
124 }
125 _ => break,
126 }
127 }
128
129 let start = Pos::new(self.line, self.column, self.cursor);
130
131 let token = match self.peek()? {
132 '(' => {
133 self.consume();
134 Ok(Token::LeftPar)
135 }
136 ')' => {
137 self.consume();
138 Ok(Token::RightPar)
139 }
140 '\'' => {
141 self.consume();
142 Ok(Token::Quote)
143 }
144
145 // Number
146 ch if ch.is_ascii_digit()
147 || matches!(ch, '+' | '-' | '.')
148 && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit()) =>
149 {
150 Ok(Token::Number(self.next_atom()))
151 }
152
153 // String
154 '"' => self.next_string().map(Token::String),
155
156 // Symbol
157 _ => Ok(Token::Symbol(self.next_atom())),
158 };
159
160 let end = Pos::new(self.line, self.column, self.cursor);
161 Some(Span::new(token, start, end))
162 }
163}