From f6983686a66c3b8941af471d78642b307eb26f8e Mon Sep 17 00:00:00 2001 From: Tolmachev Igor Date: Sat, 9 May 2026 00:34:22 +0300 Subject: Add AST parser skeleton --- compiler/src/ast/error.rs | 12 +++ compiler/src/ast/mod.rs | 27 +++++++ compiler/src/ast/parser.rs | 182 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 compiler/src/ast/error.rs create mode 100644 compiler/src/ast/mod.rs create mode 100644 compiler/src/ast/parser.rs (limited to 'compiler/src') diff --git a/compiler/src/ast/error.rs b/compiler/src/ast/error.rs new file mode 100644 index 0000000..11f552d --- /dev/null +++ b/compiler/src/ast/error.rs @@ -0,0 +1,12 @@ +use std::{error, fmt}; + +#[derive(Debug)] +pub enum Error {} + +impl fmt::Display for Error { + fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { + todo!() + } +} + +impl error::Error for Error {} diff --git a/compiler/src/ast/mod.rs b/compiler/src/ast/mod.rs new file mode 100644 index 0000000..4587ea7 --- /dev/null +++ b/compiler/src/ast/mod.rs @@ -0,0 +1,27 @@ +mod error; +mod parser; + +use std::rc::Rc; + +use crate::span::Spanned; +pub use error::Error; +pub use parser::Parser; + +#[derive(Clone, Debug)] +pub enum Atom { + Float(f64), + Integer(i64), + String(Rc), + Symbol(Rc), + Bool(bool), + Nil, +} + +#[derive(Clone, Debug)] +pub enum Expr { + Atom(Atom), + List(Vec>), +} + +#[derive(Clone, Debug)] +pub struct Program(pub Vec>); diff --git a/compiler/src/ast/parser.rs b/compiler/src/ast/parser.rs new file mode 100644 index 0000000..171ecfe --- /dev/null +++ b/compiler/src/ast/parser.rs @@ -0,0 +1,182 @@ +use std::{iter::Peekable, result}; + +use crate::{ + ast::{Atom, Error, Expr, Program}, + lexer::Token, + span::{Pos, Span, Spanned}, +}; + +type Result = result::Result>; + +fn parse_number(number: &str) -> Atom { + let is_float = number.bytes().any(|b| matches!(b, b'.' | b'e' | b'E')); + + if is_float { + match number.parse() { + Ok(ok) => Atom::Float(ok), + Err(err) => todo!("invalid float literal {number}: {err}"), + } + } else { + match number.parse() { + Ok(ok) => Atom::Integer(ok), + Err(err) => todo!("invalid integer literal {number}: {err}"), + } + } +} + +fn parse_string(string: &str) -> Atom { + let mut result = String::new(); + let mut is_escape = false; + + for ch in string.chars() { + if !is_escape { + match ch { + '\\' => is_escape = true, + _ => result.push(ch), + } + } else { + match ch { + '"' => result.push('"'), + 'n' => result.push('\n'), + '\\' => result.push('\\'), + '\n' => {} + _ => todo!("unexpected escape char {ch:?}"), + } + is_escape = false; + } + } + + if is_escape { + todo!("unclosed string"); + } + + Atom::String(result.into()) +} + +fn parse_symbol(symbol: &str) -> Atom { + match symbol { + "true" => Atom::Bool(true), + "false" => Atom::Bool(false), + "nil" => Atom::Nil, + _ => Atom::Symbol(symbol.into()), + } +} + +pub struct Parser<'a, I> +where + I: Iterator>>, +{ + tokens: Peekable, + cursor: Pos, +} + +impl<'a, I> Parser<'a, I> +where + I: Iterator>>, +{ + pub fn new(tokens: I) -> Self { + Self { + tokens: tokens.peekable(), + cursor: Pos::new(1, 0, 0), + } + } + + fn peek(&mut self) -> Option>> { + self.tokens.peek().copied() + } + + fn consume(&mut self) -> Option>> { + self.tokens.next().inspect(|s| self.cursor = s.span.end) + } + + fn parse_expr(&mut self) -> Result> { + let Spanned { inner: token, span } = match self.peek() { + Some(spanned) => spanned, + None => todo!("unexpected eof"), + }; + + let expr = match token { + Token::LeftPar => { + self.consume(); + let list = self.parse_list()?; + let expr = if !list.is_empty() { + Expr::List(list) + } else { + Expr::Atom(Atom::Nil) + }; + + Spanned::new(expr, Span::new(span.start, self.cursor)) + } + Token::RightPar => todo!("unexpected par"), + Token::Quote => { + self.consume(); + let quote = Spanned::new( + Expr::Atom(Atom::Symbol("quote".into())), + Span::new(span.start, self.cursor), + ); + let expr = self.parse_expr()?; + + Spanned::new( + Expr::List(vec![quote, expr]), + Span::new(span.start, self.cursor), + ) + } + Token::Number(number) => { + self.consume(); + + Spanned::new( + Expr::Atom(parse_number(number)), + Span::new(span.start, self.cursor), + ) + } + Token::String(string) => { + self.consume(); + + Spanned::new( + Expr::Atom(parse_string(string)), + Span::new(span.start, self.cursor), + ) + } + Token::UnclosedString(string) => { + self.consume(); + todo!("unclosed string {string:?}") + } + Token::Symbol(symbol) => { + self.consume(); + + Spanned::new( + Expr::Atom(parse_symbol(symbol)), + Span::new(span.start, self.cursor), + ) + } + }; + + Ok(expr) + } + + fn parse_list(&mut self) -> Result>> { + let mut list = Vec::new(); + + while let Some(Spanned { inner: token, .. }) = self.peek() { + match token { + Token::RightPar => { + self.consume(); + return Ok(list); + } + _ => list.push(self.parse_expr()?), + } + } + + todo!("unclosed par") + } + + pub fn parse(mut self) -> Result { + let mut program = Vec::new(); + + while self.peek().is_some() { + program.push(self.parse_expr()?) + } + + Ok(Program(program)) + } +} -- cgit v1.3