aboutsummaryrefslogtreecommitdiff
path: root/compiler/src/lex/lexer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/src/lex/lexer.rs')
-rw-r--r--compiler/src/lex/lexer.rs161
1 files changed, 161 insertions, 0 deletions
diff --git a/compiler/src/lex/lexer.rs b/compiler/src/lex/lexer.rs
new file mode 100644
index 0000000..801d382
--- /dev/null
+++ b/compiler/src/lex/lexer.rs
@@ -0,0 +1,161 @@
1use crate::{
2 lex::Token,
3 span::{Pos, Span, Spanned},
4};
5
6fn is_terminator(ch: char) -> bool {
7 ch.is_whitespace() || matches!(ch, '(' | ')' | '\'' | '"' | ';')
8}
9
10pub struct Lexer<'a> {
11 input: &'a str,
12 cursor: usize,
13
14 line: usize,
15 column: usize,
16}
17
18impl<'a> Lexer<'a> {
19 pub fn new(input: &'a str) -> Self {
20 Self {
21 input,
22 cursor: 0,
23
24 line: 1,
25 column: 0,
26 }
27 }
28
29 fn rest(&self) -> &str {
30 &self.input[self.cursor..]
31 }
32
33 fn peek(&self) -> Option<char> {
34 self.rest().chars().next()
35 }
36
37 fn peek_nth(&self, n: usize) -> Option<char> {
38 self.rest().chars().nth(n)
39 }
40
41 fn consume(&mut self) -> Option<char> {
42 let ch = self.peek()?;
43
44 self.cursor += ch.len_utf8();
45 if ch == '\n' {
46 self.line += 1;
47 self.column = 0;
48 } else {
49 self.column += 1;
50 }
51
52 Some(ch)
53 }
54
55 fn next_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
56 let start = self.cursor;
57
58 while let Some(ch) = self.peek() {
59 if !predicate(ch) {
60 break;
61 }
62 self.consume();
63 }
64
65 &self.input[start..self.cursor]
66 }
67
68 fn next_atom(&mut self) -> &'a str {
69 self.next_while(|ch| !is_terminator(ch))
70 }
71
72 fn next_string(&mut self) -> Result<&'a str, &'a str> {
73 debug_assert_eq!(self.peek(), Some('"'));
74 self.consume();
75
76 let start = self.cursor;
77
78 while let Some(ch) = self.peek() {
79 match ch {
80 '"' => {
81 let string = &self.input[start..self.cursor];
82 self.consume();
83 return Ok(string);
84 }
85 '\n' => {
86 let string = &self.input[start..self.cursor];
87 self.consume();
88 return Err(string);
89 }
90 '\\' => {
91 self.consume();
92 self.consume();
93 }
94 _ => {
95 self.consume();
96 }
97 }
98 }
99
100 Err(&self.input[start..self.cursor])
101 }
102}
103
104impl<'a> Iterator for Lexer<'a> {
105 type Item = Spanned<Token<'a>>;
106
107 fn next(&mut self) -> Option<Self::Item> {
108 loop {
109 match self.peek()? {
110 ch if ch.is_whitespace() => {
111 self.next_while(char::is_whitespace);
112 }
113 ';' => {
114 self.next_while(|ch| ch != '\n');
115 }
116 _ => break,
117 }
118 }
119
120 let start = Pos::new(self.line, self.column, self.cursor);
121
122 let token = match self.peek()? {
123 '(' => {
124 self.consume();
125 Token::LeftPar
126 }
127 ')' => {
128 self.consume();
129 Token::RightPar
130 }
131 '\'' => {
132 self.consume();
133 Token::Quote
134 }
135
136 // Number
137 ch if ch.is_ascii_digit()
138 || ch == '.' && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit())
139 || matches!(ch, '+' | '-')
140 && self.peek_nth(1).is_some_and(|ch| ch.is_ascii_digit())
141 || matches!(ch, '+' | '-')
142 && self.peek_nth(1).is_some_and(|ch| ch == '.')
143 && self.peek_nth(2).is_some_and(|ch| ch.is_ascii_digit()) =>
144 {
145 Token::Number(self.next_atom())
146 }
147
148 // String
149 '"' => match self.next_string() {
150 Ok(string) => Token::String(string),
151 Err(string) => Token::UnclosedString(string),
152 },
153
154 // Symbol
155 _ => Token::Symbol(self.next_atom()),
156 };
157
158 let end = Pos::new(self.line, self.column, self.cursor);
159 Some(Spanned::new(token, Span::new(start, end)))
160 }
161}