From abda8d00117072f7c03f57eaeca9cf44427078dc Mon Sep 17 00:00:00 2001 From: Tolmachev Igor Date: Mon, 11 May 2026 08:34:22 +0300 Subject: Replace generic list AST with typed expression tree Each form (fn, let, for, set, do, call) now has its own variant with named fields instead of being a plain list. --- compiler/src/ast/parser.rs | 303 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 253 insertions(+), 50 deletions(-) (limited to 'compiler/src/ast/parser.rs') diff --git a/compiler/src/ast/parser.rs b/compiler/src/ast/parser.rs index 4b4f949..12a6f0d 100644 --- a/compiler/src/ast/parser.rs +++ b/compiler/src/ast/parser.rs @@ -1,7 +1,7 @@ -use std::iter::Peekable; +use std::{iter::Peekable, rc::Rc}; use crate::{ - ast::{Ast, Atom, Error, Expr}, + ast::{Ast, Atom, Error, Expr, models::LetVar}, lex::Token, span::{Pos, Span, Spanned}, }; @@ -10,7 +10,7 @@ pub(super) const MAX_DEPTH: usize = 256; // TODO: make it a compile flag fn parse_number(number: &str) -> Result { match number.parse() { - Ok(ok) => Ok(Atom::Integer(ok)), + Ok(ok) => Ok(Atom::Int(ok)), Err(err) => Err(Error::InvalidIntegerLiteral(number.into(), err)), } } @@ -41,7 +41,7 @@ fn parse_string(string: &str) -> Result { return Err(Error::UnclosedString(string.into())); } - Ok(Atom::String(result.into())) + Ok(Atom::Str(result.into())) } fn parse_symbol(symbol: &str) -> Atom { @@ -49,7 +49,7 @@ fn parse_symbol(symbol: &str) -> Atom { "true" => Atom::Bool(true), "false" => Atom::Bool(false), "nil" => Atom::Nil, - _ => Atom::Symbol(symbol.into()), + _ => Atom::Sym(symbol.into()), } } @@ -78,50 +78,275 @@ where self.tokens.peek().copied() } - fn consume(&mut self) -> Option>> { - self.tokens - .next() - .inspect(|s| self.last_token_span = s.span) + fn peek_token(&mut self) -> Result>, Spanned> { + match self.peek() { + Some(token) => Ok(token), + None => Err(Spanned::new(Error::UnexpectedEof, self.last_token_span)), + } } - fn parse_expr(&mut self) -> Result, Spanned> { - let Spanned { inner: token, span } = match self.peek() { - Some(token) => token, - None => return Err(Spanned::new(Error::UnexpectedEof, self.last_token_span)), + fn consume(&mut self) -> Result<(), Spanned> { + match self.tokens.next() { + Some(token) => { + let (token, span) = token.into_parts(); + + if let Token::LeftPar = token { + self.depth = self.depth.saturating_add(1); + if self.depth > MAX_DEPTH { + return Err(Spanned::new(Error::RecursionLimit, span)); + } + } else if let Token::RightPar = token { + self.depth = self.depth.saturating_sub(1); + } + + self.last_token_span = span; + Ok(()) + } + None => panic!("no tokens to consume"), + } + } + + fn require_sym(&mut self, symbol: &str) -> Result> { + let (token, span) = self.peek_token()?.into_parts(); + self.consume()?; + match token { + Token::Symbol(s) if s == symbol => Ok(span), + _ => Err(Spanned::new(Error::UnexpectedToken, span)), + } + } + + fn require_left_par(&mut self) -> Result> { + let (token, span) = self.peek_token()?.into_parts(); + self.consume()?; + match token { + Token::LeftPar => Ok(span), + _ => Err(Spanned::new(Error::UnexpectedToken, span)), + } + } + + fn require_right_par(&mut self) -> Result> { + let (token, span) = self.peek_token()?.into_parts(); + self.consume()?; + match token { + Token::RightPar => Ok(span), + _ => Err(Spanned::new(Error::UnexpectedToken, span)), + } + } + + fn parse_sym(&mut self) -> Result, Spanned> { + let (token, span) = self.peek_token()?.into_parts(); + self.consume()?; + match token { + Token::Symbol(symbol) => Ok(Spanned::new(symbol, span)), + _ => Err(Spanned::new(Error::UnexpectedToken, span)), + } + } + + fn parse_args(&mut self, open_span: Span) -> Result>>, Spanned> { + let mut args = Vec::new(); + while let Some(token) = self.peek() { + let (token, span) = token.into_parts(); + + self.consume()?; + match token { + Token::Symbol(symbol) => args.push(Spanned::new(symbol.into(), span)), + Token::RightPar => return Ok(args), + _ => return Err(Spanned::new(Error::UnexpectedToken, span)), + } + } + + Err(Spanned::new(Error::UnclosedPar, open_span)) + } + + fn parse_body( + &mut self, + open_span: Span, + non_empty: bool, + ) -> Result>, Spanned> { + let mut body = Vec::new(); + if non_empty { + body.push(self.parse_expr()?); + } + while let Some(token) = self.peek() { + match token.inner { + Token::RightPar => { + self.consume()?; + return Ok(body); + } + _ => body.push(self.parse_expr()?), + } + } + + Err(Spanned::new(Error::UnclosedPar, open_span)) + } + + fn parse_var(&mut self) -> Result, Spanned> { + let open_span = self.require_left_par()?; + let name = self.parse_sym()?.map(Into::into); + let expr = self.parse_expr()?; + let close_span = self.require_right_par()?; + + let let_var = LetVar { name, expr }; + let span = Span::new(open_span.start, close_span.end); + Ok(Spanned::new(let_var, span)) + } + + fn parse_vars(&mut self, open_span: Span) -> Result>, Spanned> { + let mut vars = Vec::new(); + vars.push(self.parse_var()?); + + while let Some(token) = self.peek() { + let (token, span) = token.into_parts(); + match token { + Token::LeftPar => vars.push(self.parse_var()?), + Token::RightPar => { + self.consume()?; + return Ok(vars); + } + _ => return Err(Spanned::new(Error::UnexpectedToken, span)), + } + } + + Err(Spanned::new(Error::UnclosedPar, open_span)) + } + + fn parse_fn(&mut self, open_span: Span) -> Result, Spanned> { + self.consume()?; + + let name = self.parse_sym()?.map(Into::into); + let args_open_span = self.require_left_par()?; + let args = self.parse_args(args_open_span)?; + + let body = self.parse_body(open_span, true)?; + let function = Expr::Fn { name, args, body }; + let span = Span::new(open_span.start, self.last_token_span.end); + + Ok(Spanned::new(function, span)) + } + + fn parse_const(&mut self, open_span: Span) -> Result, Spanned> { + self.consume()?; + + let vars = self.parse_vars(open_span)?; + let constant = Expr::Const { vars }; + let span = Span::new(open_span.start, self.last_token_span.end); + Ok(Spanned::new(constant, span)) + } + + fn parse_let(&mut self, open_span: Span) -> Result, Spanned> { + self.consume()?; + + let vars_span = self.require_left_par()?; + let vars = self.parse_vars(vars_span)?; + let body = self.parse_body(open_span, true)?; + let let_vars = Expr::Let { vars, body }; + let span = Span::new(open_span.start, self.last_token_span.end); + Ok(Spanned::new(let_vars, span)) + } + + fn parse_for(&mut self, open_span: Span) -> Result, Spanned> { + self.consume()?; + + let loop_var = self.parse_sym()?.map(Into::into); + self.require_sym("from")?; + let from = self.parse_expr()?.map(Box::new); + self.require_sym("to")?; + let to = self.parse_expr()?.map(Box::new); + let body = self.parse_body(open_span, true)?; + + let for_loop = Expr::For { + loop_var, + from, + to, + body, }; + let span = Span::new(open_span.start, self.last_token_span.end); + Ok(Spanned::new(for_loop, span)) + } + + fn parse_set(&mut self, open_span: Span) -> Result, Spanned> { + self.consume()?; + + let target_var = self.parse_sym()?.map(Into::into); + let expr = self.parse_expr()?.map(Box::new); + self.require_right_par()?; + + let set = Expr::Set { target_var, expr }; + let span = Span::new(open_span.start, self.last_token_span.end); + Ok(Spanned::new(set, span)) + } + + fn parse_do(&mut self, open_span: Span) -> Result, Spanned> { + self.consume()?; + + let body = self.parse_body(open_span, true)?; + + let do_body = Expr::Do { body }; + let span = Span::new(open_span.start, self.last_token_span.end); + Ok(Spanned::new(do_body, span)) + } + + fn parse_call(&mut self, open_span: Span) -> Result, Spanned> { + let fn_name = self.parse_sym()?.map(Into::into); + let args = self.parse_body(open_span, false)?; + + let call = Expr::Call { fn_name, args }; + let span = Span::new(open_span.start, self.last_token_span.end); + Ok(Spanned::new(call, span)) + } + + fn parse_special_form(&mut self, open_span: Span) -> Result, Spanned> { + let (token, span) = self.peek_token()?.into_parts(); + + let symbol = match token { + Token::Symbol(symbol) => symbol, + Token::RightPar => { + self.consume()?; + let span = Span::new(open_span.start, span.end); + return Ok(Spanned::new(Expr::Atom(Atom::Nil), span)); + } + _ => return Err(Spanned::new(Error::UnexpectedToken, span)), + }; + + match symbol { + "fn" => self.parse_fn(open_span), + "const" => self.parse_const(open_span), + "let" => self.parse_let(open_span), + "for" => self.parse_for(open_span), + "set" => self.parse_set(open_span), + "do" => self.parse_do(open_span), + _ => self.parse_call(open_span), + } + } + + fn parse_expr(&mut self) -> Result, Spanned> { + let (token, span) = self.peek_token()?.into_parts(); let expr = match token { Token::LeftPar => { - self.consume(); - let list = self.parse_list(span)?; - let expr = if !list.is_empty() { - Expr::List(list) - } else { - Expr::Atom(Atom::Nil) - }; - - Spanned::new(expr, Span::new(span.start, self.last_token_span.end)) + self.consume()?; + self.parse_special_form(span)? } Token::RightPar => { - self.consume(); - return Err(Spanned::new(Error::UnexpectedRightPar, span)); + self.consume()?; + return Err(Spanned::new(Error::UnexpectedClosePar, span)); } Token::Number(number) => { - self.consume(); + self.consume()?; let atom = parse_number(number).map_err(|e| Spanned::new(e, span))?; Spanned::new(Expr::Atom(atom), span) } Token::String(string) => { - self.consume(); + self.consume()?; let atom = parse_string(string).map_err(|e| Spanned::new(e, span))?; Spanned::new(Expr::Atom(atom), span) } Token::UnclosedString(string) => { - self.consume(); + self.consume()?; return Err(Spanned::new(Error::UnclosedString(string.into()), span)); } Token::Symbol(symbol) => { - self.consume(); + self.consume()?; let atom = parse_symbol(symbol); Spanned::new(Expr::Atom(atom), span) } @@ -130,28 +355,6 @@ where Ok(expr) } - fn parse_list(&mut self, left_par_span: Span) -> Result>, Spanned> { - let mut list = Vec::new(); - - self.depth += 1; - if self.depth >= MAX_DEPTH { - return Err(Spanned::new(Error::RecursionLimit, self.last_token_span)); - } - - while let Some(Spanned { inner: token, .. }) = self.peek() { - match token { - Token::RightPar => { - self.consume(); - return Ok(list); - } - _ => list.push(self.parse_expr()?), - } - } - self.depth -= 1; - - Err(Spanned::new(Error::UnclosedLeftPar, left_par_span)) - } - pub fn parse(mut self) -> Result> { let mut ast = Vec::new(); -- cgit v1.3