diff options
| author | Tolmachev Igor <me@igorek.dev> | 2026-05-11 08:34:22 +0300 |
|---|---|---|
| committer | Tolmachev Igor <me@igorek.dev> | 2026-05-11 08:34:22 +0300 |
| commit | abda8d00117072f7c03f57eaeca9cf44427078dc (patch) | |
| tree | a7caf8c91932ce195398dbd63758a057720366a1 /compiler/src/ast/parser.rs | |
| parent | 7163aaebc993591db1cb4d7ae2be31669a0cb9a7 (diff) | |
| download | crisp-abda8d00117072f7c03f57eaeca9cf44427078dc.tar.gz crisp-abda8d00117072f7c03f57eaeca9cf44427078dc.zip | |
Replace generic list AST with typed expression tree
Each form (fn, let, for, set, do, call) now has its own variant with named fields instead of being a
plain list.
Diffstat (limited to 'compiler/src/ast/parser.rs')
| -rw-r--r-- | compiler/src/ast/parser.rs | 303 |
1 files changed, 253 insertions, 50 deletions
diff --git a/compiler/src/ast/parser.rs b/compiler/src/ast/parser.rs index 4b4f949..12a6f0d 100644 --- a/compiler/src/ast/parser.rs +++ b/compiler/src/ast/parser.rs | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | use std::iter::Peekable; | 1 | use std::{iter::Peekable, rc::Rc}; |
| 2 | 2 | ||
| 3 | use crate::{ | 3 | use crate::{ |
| 4 | ast::{Ast, Atom, Error, Expr}, | 4 | ast::{Ast, Atom, Error, Expr, models::LetVar}, |
| 5 | lex::Token, | 5 | lex::Token, |
| 6 | span::{Pos, Span, Spanned}, | 6 | span::{Pos, Span, Spanned}, |
| 7 | }; | 7 | }; |
| @@ -10,7 +10,7 @@ pub(super) const MAX_DEPTH: usize = 256; // TODO: make it a compile flag | |||
| 10 | 10 | ||
| 11 | fn parse_number(number: &str) -> Result<Atom, Error> { | 11 | fn parse_number(number: &str) -> Result<Atom, Error> { |
| 12 | match number.parse() { | 12 | match number.parse() { |
| 13 | Ok(ok) => Ok(Atom::Integer(ok)), | 13 | Ok(ok) => Ok(Atom::Int(ok)), |
| 14 | Err(err) => Err(Error::InvalidIntegerLiteral(number.into(), err)), | 14 | Err(err) => Err(Error::InvalidIntegerLiteral(number.into(), err)), |
| 15 | } | 15 | } |
| 16 | } | 16 | } |
| @@ -41,7 +41,7 @@ fn parse_string(string: &str) -> Result<Atom, Error> { | |||
| 41 | return Err(Error::UnclosedString(string.into())); | 41 | return Err(Error::UnclosedString(string.into())); |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | Ok(Atom::String(result.into())) | 44 | Ok(Atom::Str(result.into())) |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | fn parse_symbol(symbol: &str) -> Atom { | 47 | fn parse_symbol(symbol: &str) -> Atom { |
| @@ -49,7 +49,7 @@ fn parse_symbol(symbol: &str) -> Atom { | |||
| 49 | "true" => Atom::Bool(true), | 49 | "true" => Atom::Bool(true), |
| 50 | "false" => Atom::Bool(false), | 50 | "false" => Atom::Bool(false), |
| 51 | "nil" => Atom::Nil, | 51 | "nil" => Atom::Nil, |
| 52 | _ => Atom::Symbol(symbol.into()), | 52 | _ => Atom::Sym(symbol.into()), |
| 53 | } | 53 | } |
| 54 | } | 54 | } |
| 55 | 55 | ||
| @@ -78,50 +78,275 @@ where | |||
| 78 | self.tokens.peek().copied() | 78 | self.tokens.peek().copied() |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | fn consume(&mut self) -> Option<Spanned<Token<'a>>> { | 81 | fn peek_token(&mut self) -> Result<Spanned<Token<'a>>, Spanned<Error>> { |
| 82 | self.tokens | 82 | match self.peek() { |
| 83 | .next() | 83 | Some(token) => Ok(token), |
| 84 | .inspect(|s| self.last_token_span = s.span) | 84 | None => Err(Spanned::new(Error::UnexpectedEof, self.last_token_span)), |
| 85 | } | ||
| 85 | } | 86 | } |
| 86 | 87 | ||
| 87 | fn parse_expr(&mut self) -> Result<Spanned<Expr>, Spanned<Error>> { | 88 | fn consume(&mut self) -> Result<(), Spanned<Error>> { |
| 88 | let Spanned { inner: token, span } = match self.peek() { | 89 | match self.tokens.next() { |
| 89 | Some(token) => token, | 90 | Some(token) => { |
| 90 | None => return Err(Spanned::new(Error::UnexpectedEof, self.last_token_span)), | 91 | let (token, span) = token.into_parts(); |
| 92 | |||
| 93 | if let Token::LeftPar = token { | ||
| 94 | self.depth = self.depth.saturating_add(1); | ||
| 95 | if self.depth > MAX_DEPTH { | ||
| 96 | return Err(Spanned::new(Error::RecursionLimit, span)); | ||
| 97 | } | ||
| 98 | } else if let Token::RightPar = token { | ||
| 99 | self.depth = self.depth.saturating_sub(1); | ||
| 100 | } | ||
| 101 | |||
| 102 | self.last_token_span = span; | ||
| 103 | Ok(()) | ||
| 104 | } | ||
| 105 | None => panic!("no tokens to consume"), | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | fn require_sym(&mut self, symbol: &str) -> Result<Span, Spanned<Error>> { | ||
| 110 | let (token, span) = self.peek_token()?.into_parts(); | ||
| 111 | self.consume()?; | ||
| 112 | match token { | ||
| 113 | Token::Symbol(s) if s == symbol => Ok(span), | ||
| 114 | _ => Err(Spanned::new(Error::UnexpectedToken, span)), | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | fn require_left_par(&mut self) -> Result<Span, Spanned<Error>> { | ||
| 119 | let (token, span) = self.peek_token()?.into_parts(); | ||
| 120 | self.consume()?; | ||
| 121 | match token { | ||
| 122 | Token::LeftPar => Ok(span), | ||
| 123 | _ => Err(Spanned::new(Error::UnexpectedToken, span)), | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | fn require_right_par(&mut self) -> Result<Span, Spanned<Error>> { | ||
| 128 | let (token, span) = self.peek_token()?.into_parts(); | ||
| 129 | self.consume()?; | ||
| 130 | match token { | ||
| 131 | Token::RightPar => Ok(span), | ||
| 132 | _ => Err(Spanned::new(Error::UnexpectedToken, span)), | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | fn parse_sym(&mut self) -> Result<Spanned<&'a str>, Spanned<Error>> { | ||
| 137 | let (token, span) = self.peek_token()?.into_parts(); | ||
| 138 | self.consume()?; | ||
| 139 | match token { | ||
| 140 | Token::Symbol(symbol) => Ok(Spanned::new(symbol, span)), | ||
| 141 | _ => Err(Spanned::new(Error::UnexpectedToken, span)), | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | fn parse_args(&mut self, open_span: Span) -> Result<Vec<Spanned<Rc<str>>>, Spanned<Error>> { | ||
| 146 | let mut args = Vec::new(); | ||
| 147 | while let Some(token) = self.peek() { | ||
| 148 | let (token, span) = token.into_parts(); | ||
| 149 | |||
| 150 | self.consume()?; | ||
| 151 | match token { | ||
| 152 | Token::Symbol(symbol) => args.push(Spanned::new(symbol.into(), span)), | ||
| 153 | Token::RightPar => return Ok(args), | ||
| 154 | _ => return Err(Spanned::new(Error::UnexpectedToken, span)), | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | Err(Spanned::new(Error::UnclosedPar, open_span)) | ||
| 159 | } | ||
| 160 | |||
| 161 | fn parse_body( | ||
| 162 | &mut self, | ||
| 163 | open_span: Span, | ||
| 164 | non_empty: bool, | ||
| 165 | ) -> Result<Vec<Spanned<Expr>>, Spanned<Error>> { | ||
| 166 | let mut body = Vec::new(); | ||
| 167 | if non_empty { | ||
| 168 | body.push(self.parse_expr()?); | ||
| 169 | } | ||
| 170 | while let Some(token) = self.peek() { | ||
| 171 | match token.inner { | ||
| 172 | Token::RightPar => { | ||
| 173 | self.consume()?; | ||
| 174 | return Ok(body); | ||
| 175 | } | ||
| 176 | _ => body.push(self.parse_expr()?), | ||
| 177 | } | ||
| 178 | } | ||
| 179 | |||
| 180 | Err(Spanned::new(Error::UnclosedPar, open_span)) | ||
| 181 | } | ||
| 182 | |||
| 183 | fn parse_var(&mut self) -> Result<Spanned<LetVar>, Spanned<Error>> { | ||
| 184 | let open_span = self.require_left_par()?; | ||
| 185 | let name = self.parse_sym()?.map(Into::into); | ||
| 186 | let expr = self.parse_expr()?; | ||
| 187 | let close_span = self.require_right_par()?; | ||
| 188 | |||
| 189 | let let_var = LetVar { name, expr }; | ||
| 190 | let span = Span::new(open_span.start, close_span.end); | ||
| 191 | Ok(Spanned::new(let_var, span)) | ||
| 192 | } | ||
| 193 | |||
| 194 | fn parse_vars(&mut self, open_span: Span) -> Result<Vec<Spanned<LetVar>>, Spanned<Error>> { | ||
| 195 | let mut vars = Vec::new(); | ||
| 196 | vars.push(self.parse_var()?); | ||
| 197 | |||
| 198 | while let Some(token) = self.peek() { | ||
| 199 | let (token, span) = token.into_parts(); | ||
| 200 | match token { | ||
| 201 | Token::LeftPar => vars.push(self.parse_var()?), | ||
| 202 | Token::RightPar => { | ||
| 203 | self.consume()?; | ||
| 204 | return Ok(vars); | ||
| 205 | } | ||
| 206 | _ => return Err(Spanned::new(Error::UnexpectedToken, span)), | ||
| 207 | } | ||
| 208 | } | ||
| 209 | |||
| 210 | Err(Spanned::new(Error::UnclosedPar, open_span)) | ||
| 211 | } | ||
| 212 | |||
| 213 | fn parse_fn(&mut self, open_span: Span) -> Result<Spanned<Expr>, Spanned<Error>> { | ||
| 214 | self.consume()?; | ||
| 215 | |||
| 216 | let name = self.parse_sym()?.map(Into::into); | ||
| 217 | let args_open_span = self.require_left_par()?; | ||
| 218 | let args = self.parse_args(args_open_span)?; | ||
| 219 | |||
| 220 | let body = self.parse_body(open_span, true)?; | ||
| 221 | let function = Expr::Fn { name, args, body }; | ||
| 222 | let span = Span::new(open_span.start, self.last_token_span.end); | ||
| 223 | |||
| 224 | Ok(Spanned::new(function, span)) | ||
| 225 | } | ||
| 226 | |||
| 227 | fn parse_const(&mut self, open_span: Span) -> Result<Spanned<Expr>, Spanned<Error>> { | ||
| 228 | self.consume()?; | ||
| 229 | |||
| 230 | let vars = self.parse_vars(open_span)?; | ||
| 231 | let constant = Expr::Const { vars }; | ||
| 232 | let span = Span::new(open_span.start, self.last_token_span.end); | ||
| 233 | Ok(Spanned::new(constant, span)) | ||
| 234 | } | ||
| 235 | |||
| 236 | fn parse_let(&mut self, open_span: Span) -> Result<Spanned<Expr>, Spanned<Error>> { | ||
| 237 | self.consume()?; | ||
| 238 | |||
| 239 | let vars_span = self.require_left_par()?; | ||
| 240 | let vars = self.parse_vars(vars_span)?; | ||
| 241 | let body = self.parse_body(open_span, true)?; | ||
| 242 | let let_vars = Expr::Let { vars, body }; | ||
| 243 | let span = Span::new(open_span.start, self.last_token_span.end); | ||
| 244 | Ok(Spanned::new(let_vars, span)) | ||
| 245 | } | ||
| 246 | |||
| 247 | fn parse_for(&mut self, open_span: Span) -> Result<Spanned<Expr>, Spanned<Error>> { | ||
| 248 | self.consume()?; | ||
| 249 | |||
| 250 | let loop_var = self.parse_sym()?.map(Into::into); | ||
| 251 | self.require_sym("from")?; | ||
| 252 | let from = self.parse_expr()?.map(Box::new); | ||
| 253 | self.require_sym("to")?; | ||
| 254 | let to = self.parse_expr()?.map(Box::new); | ||
| 255 | let body = self.parse_body(open_span, true)?; | ||
| 256 | |||
| 257 | let for_loop = Expr::For { | ||
| 258 | loop_var, | ||
| 259 | from, | ||
| 260 | to, | ||
| 261 | body, | ||
| 91 | }; | 262 | }; |
| 263 | let span = Span::new(open_span.start, self.last_token_span.end); | ||
| 264 | Ok(Spanned::new(for_loop, span)) | ||
| 265 | } | ||
| 266 | |||
| 267 | fn parse_set(&mut self, open_span: Span) -> Result<Spanned<Expr>, Spanned<Error>> { | ||
| 268 | self.consume()?; | ||
| 269 | |||
| 270 | let target_var = self.parse_sym()?.map(Into::into); | ||
| 271 | let expr = self.parse_expr()?.map(Box::new); | ||
| 272 | self.require_right_par()?; | ||
| 273 | |||
| 274 | let set = Expr::Set { target_var, expr }; | ||
| 275 | let span = Span::new(open_span.start, self.last_token_span.end); | ||
| 276 | Ok(Spanned::new(set, span)) | ||
| 277 | } | ||
| 278 | |||
| 279 | fn parse_do(&mut self, open_span: Span) -> Result<Spanned<Expr>, Spanned<Error>> { | ||
| 280 | self.consume()?; | ||
| 281 | |||
| 282 | let body = self.parse_body(open_span, true)?; | ||
| 283 | |||
| 284 | let do_body = Expr::Do { body }; | ||
| 285 | let span = Span::new(open_span.start, self.last_token_span.end); | ||
| 286 | Ok(Spanned::new(do_body, span)) | ||
| 287 | } | ||
| 288 | |||
| 289 | fn parse_call(&mut self, open_span: Span) -> Result<Spanned<Expr>, Spanned<Error>> { | ||
| 290 | let fn_name = self.parse_sym()?.map(Into::into); | ||
| 291 | let args = self.parse_body(open_span, false)?; | ||
| 292 | |||
| 293 | let call = Expr::Call { fn_name, args }; | ||
| 294 | let span = Span::new(open_span.start, self.last_token_span.end); | ||
| 295 | Ok(Spanned::new(call, span)) | ||
| 296 | } | ||
| 297 | |||
| 298 | fn parse_special_form(&mut self, open_span: Span) -> Result<Spanned<Expr>, Spanned<Error>> { | ||
| 299 | let (token, span) = self.peek_token()?.into_parts(); | ||
| 300 | |||
| 301 | let symbol = match token { | ||
| 302 | Token::Symbol(symbol) => symbol, | ||
| 303 | Token::RightPar => { | ||
| 304 | self.consume()?; | ||
| 305 | let span = Span::new(open_span.start, span.end); | ||
| 306 | return Ok(Spanned::new(Expr::Atom(Atom::Nil), span)); | ||
| 307 | } | ||
| 308 | _ => return Err(Spanned::new(Error::UnexpectedToken, span)), | ||
| 309 | }; | ||
| 310 | |||
| 311 | match symbol { | ||
| 312 | "fn" => self.parse_fn(open_span), | ||
| 313 | "const" => self.parse_const(open_span), | ||
| 314 | "let" => self.parse_let(open_span), | ||
| 315 | "for" => self.parse_for(open_span), | ||
| 316 | "set" => self.parse_set(open_span), | ||
| 317 | "do" => self.parse_do(open_span), | ||
| 318 | _ => self.parse_call(open_span), | ||
| 319 | } | ||
| 320 | } | ||
| 321 | |||
| 322 | fn parse_expr(&mut self) -> Result<Spanned<Expr>, Spanned<Error>> { | ||
| 323 | let (token, span) = self.peek_token()?.into_parts(); | ||
| 92 | 324 | ||
| 93 | let expr = match token { | 325 | let expr = match token { |
| 94 | Token::LeftPar => { | 326 | Token::LeftPar => { |
| 95 | self.consume(); | 327 | self.consume()?; |
| 96 | let list = self.parse_list(span)?; | 328 | self.parse_special_form(span)? |
| 97 | let expr = if !list.is_empty() { | ||
| 98 | Expr::List(list) | ||
| 99 | } else { | ||
| 100 | Expr::Atom(Atom::Nil) | ||
| 101 | }; | ||
| 102 | |||
| 103 | Spanned::new(expr, Span::new(span.start, self.last_token_span.end)) | ||
| 104 | } | 329 | } |
| 105 | Token::RightPar => { | 330 | Token::RightPar => { |
| 106 | self.consume(); | 331 | self.consume()?; |
| 107 | return Err(Spanned::new(Error::UnexpectedRightPar, span)); | 332 | return Err(Spanned::new(Error::UnexpectedClosePar, span)); |
| 108 | } | 333 | } |
| 109 | Token::Number(number) => { | 334 | Token::Number(number) => { |
| 110 | self.consume(); | 335 | self.consume()?; |
| 111 | let atom = parse_number(number).map_err(|e| Spanned::new(e, span))?; | 336 | let atom = parse_number(number).map_err(|e| Spanned::new(e, span))?; |
| 112 | Spanned::new(Expr::Atom(atom), span) | 337 | Spanned::new(Expr::Atom(atom), span) |
| 113 | } | 338 | } |
| 114 | Token::String(string) => { | 339 | Token::String(string) => { |
| 115 | self.consume(); | 340 | self.consume()?; |
| 116 | let atom = parse_string(string).map_err(|e| Spanned::new(e, span))?; | 341 | let atom = parse_string(string).map_err(|e| Spanned::new(e, span))?; |
| 117 | Spanned::new(Expr::Atom(atom), span) | 342 | Spanned::new(Expr::Atom(atom), span) |
| 118 | } | 343 | } |
| 119 | Token::UnclosedString(string) => { | 344 | Token::UnclosedString(string) => { |
| 120 | self.consume(); | 345 | self.consume()?; |
| 121 | return Err(Spanned::new(Error::UnclosedString(string.into()), span)); | 346 | return Err(Spanned::new(Error::UnclosedString(string.into()), span)); |
| 122 | } | 347 | } |
| 123 | Token::Symbol(symbol) => { | 348 | Token::Symbol(symbol) => { |
| 124 | self.consume(); | 349 | self.consume()?; |
| 125 | let atom = parse_symbol(symbol); | 350 | let atom = parse_symbol(symbol); |
| 126 | Spanned::new(Expr::Atom(atom), span) | 351 | Spanned::new(Expr::Atom(atom), span) |
| 127 | } | 352 | } |
| @@ -130,28 +355,6 @@ where | |||
| 130 | Ok(expr) | 355 | Ok(expr) |
| 131 | } | 356 | } |
| 132 | 357 | ||
| 133 | fn parse_list(&mut self, left_par_span: Span) -> Result<Vec<Spanned<Expr>>, Spanned<Error>> { | ||
| 134 | let mut list = Vec::new(); | ||
| 135 | |||
| 136 | self.depth += 1; | ||
| 137 | if self.depth >= MAX_DEPTH { | ||
| 138 | return Err(Spanned::new(Error::RecursionLimit, self.last_token_span)); | ||
| 139 | } | ||
| 140 | |||
| 141 | while let Some(Spanned { inner: token, .. }) = self.peek() { | ||
| 142 | match token { | ||
| 143 | Token::RightPar => { | ||
| 144 | self.consume(); | ||
| 145 | return Ok(list); | ||
| 146 | } | ||
| 147 | _ => list.push(self.parse_expr()?), | ||
| 148 | } | ||
| 149 | } | ||
| 150 | self.depth -= 1; | ||
| 151 | |||
| 152 | Err(Spanned::new(Error::UnclosedLeftPar, left_par_span)) | ||
| 153 | } | ||
| 154 | |||
| 155 | pub fn parse(mut self) -> Result<Ast, Spanned<Error>> { | 358 | pub fn parse(mut self) -> Result<Ast, Spanned<Error>> { |
| 156 | let mut ast = Vec::new(); | 359 | let mut ast = Vec::new(); |
| 157 | 360 | ||
