diff options
| author | Tolmachev Igor <me@igorek.dev> | 2026-05-09 00:34:22 +0300 |
|---|---|---|
| committer | Tolmachev Igor <me@igorek.dev> | 2026-05-09 01:25:25 +0300 |
| commit | f6983686a66c3b8941af471d78642b307eb26f8e (patch) | |
| tree | 80fd754fcc820e9b47b9d9cdafbdf86065ebf3ed /compiler/src | |
| parent | 17475ea76a2a6e81bc25a995eca0f19c727a683a (diff) | |
| download | crisp-f6983686a66c3b8941af471d78642b307eb26f8e.tar.gz crisp-f6983686a66c3b8941af471d78642b307eb26f8e.zip | |
Add AST parser skeleton
Diffstat (limited to 'compiler/src')
| -rw-r--r-- | compiler/src/ast/error.rs | 12 | ||||
| -rw-r--r-- | compiler/src/ast/mod.rs | 27 | ||||
| -rw-r--r-- | compiler/src/ast/parser.rs | 182 |
3 files changed, 221 insertions, 0 deletions
diff --git a/compiler/src/ast/error.rs b/compiler/src/ast/error.rs new file mode 100644 index 0000000..11f552d --- /dev/null +++ b/compiler/src/ast/error.rs | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | use std::{error, fmt}; | ||
| 2 | |||
| 3 | #[derive(Debug)] | ||
| 4 | pub enum Error {} | ||
| 5 | |||
| 6 | impl fmt::Display for Error { | ||
| 7 | fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { | ||
| 8 | todo!() | ||
| 9 | } | ||
| 10 | } | ||
| 11 | |||
| 12 | impl error::Error for Error {} | ||
diff --git a/compiler/src/ast/mod.rs b/compiler/src/ast/mod.rs new file mode 100644 index 0000000..4587ea7 --- /dev/null +++ b/compiler/src/ast/mod.rs | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | mod error; | ||
| 2 | mod parser; | ||
| 3 | |||
| 4 | use std::rc::Rc; | ||
| 5 | |||
| 6 | use crate::span::Spanned; | ||
| 7 | pub use error::Error; | ||
| 8 | pub use parser::Parser; | ||
| 9 | |||
| 10 | #[derive(Clone, Debug)] | ||
| 11 | pub enum Atom { | ||
| 12 | Float(f64), | ||
| 13 | Integer(i64), | ||
| 14 | String(Rc<str>), | ||
| 15 | Symbol(Rc<str>), | ||
| 16 | Bool(bool), | ||
| 17 | Nil, | ||
| 18 | } | ||
| 19 | |||
| 20 | #[derive(Clone, Debug)] | ||
| 21 | pub enum Expr { | ||
| 22 | Atom(Atom), | ||
| 23 | List(Vec<Spanned<Expr>>), | ||
| 24 | } | ||
| 25 | |||
| 26 | #[derive(Clone, Debug)] | ||
| 27 | pub struct Program(pub Vec<Spanned<Expr>>); | ||
diff --git a/compiler/src/ast/parser.rs b/compiler/src/ast/parser.rs new file mode 100644 index 0000000..171ecfe --- /dev/null +++ b/compiler/src/ast/parser.rs | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | use std::{iter::Peekable, result}; | ||
| 2 | |||
| 3 | use crate::{ | ||
| 4 | ast::{Atom, Error, Expr, Program}, | ||
| 5 | lexer::Token, | ||
| 6 | span::{Pos, Span, Spanned}, | ||
| 7 | }; | ||
| 8 | |||
| 9 | type Result<T> = result::Result<T, Spanned<Error>>; | ||
| 10 | |||
| 11 | fn parse_number(number: &str) -> Atom { | ||
| 12 | let is_float = number.bytes().any(|b| matches!(b, b'.' | b'e' | b'E')); | ||
| 13 | |||
| 14 | if is_float { | ||
| 15 | match number.parse() { | ||
| 16 | Ok(ok) => Atom::Float(ok), | ||
| 17 | Err(err) => todo!("invalid float literal {number}: {err}"), | ||
| 18 | } | ||
| 19 | } else { | ||
| 20 | match number.parse() { | ||
| 21 | Ok(ok) => Atom::Integer(ok), | ||
| 22 | Err(err) => todo!("invalid integer literal {number}: {err}"), | ||
| 23 | } | ||
| 24 | } | ||
| 25 | } | ||
| 26 | |||
| 27 | fn parse_string(string: &str) -> Atom { | ||
| 28 | let mut result = String::new(); | ||
| 29 | let mut is_escape = false; | ||
| 30 | |||
| 31 | for ch in string.chars() { | ||
| 32 | if !is_escape { | ||
| 33 | match ch { | ||
| 34 | '\\' => is_escape = true, | ||
| 35 | _ => result.push(ch), | ||
| 36 | } | ||
| 37 | } else { | ||
| 38 | match ch { | ||
| 39 | '"' => result.push('"'), | ||
| 40 | 'n' => result.push('\n'), | ||
| 41 | '\\' => result.push('\\'), | ||
| 42 | '\n' => {} | ||
| 43 | _ => todo!("unexpected escape char {ch:?}"), | ||
| 44 | } | ||
| 45 | is_escape = false; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | if is_escape { | ||
| 50 | todo!("unclosed string"); | ||
| 51 | } | ||
| 52 | |||
| 53 | Atom::String(result.into()) | ||
| 54 | } | ||
| 55 | |||
| 56 | fn parse_symbol(symbol: &str) -> Atom { | ||
| 57 | match symbol { | ||
| 58 | "true" => Atom::Bool(true), | ||
| 59 | "false" => Atom::Bool(false), | ||
| 60 | "nil" => Atom::Nil, | ||
| 61 | _ => Atom::Symbol(symbol.into()), | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | pub struct Parser<'a, I> | ||
| 66 | where | ||
| 67 | I: Iterator<Item = Spanned<Token<'a>>>, | ||
| 68 | { | ||
| 69 | tokens: Peekable<I>, | ||
| 70 | cursor: Pos, | ||
| 71 | } | ||
| 72 | |||
| 73 | impl<'a, I> Parser<'a, I> | ||
| 74 | where | ||
| 75 | I: Iterator<Item = Spanned<Token<'a>>>, | ||
| 76 | { | ||
| 77 | pub fn new(tokens: I) -> Self { | ||
| 78 | Self { | ||
| 79 | tokens: tokens.peekable(), | ||
| 80 | cursor: Pos::new(1, 0, 0), | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | fn peek(&mut self) -> Option<Spanned<Token<'a>>> { | ||
| 85 | self.tokens.peek().copied() | ||
| 86 | } | ||
| 87 | |||
| 88 | fn consume(&mut self) -> Option<Spanned<Token<'a>>> { | ||
| 89 | self.tokens.next().inspect(|s| self.cursor = s.span.end) | ||
| 90 | } | ||
| 91 | |||
| 92 | fn parse_expr(&mut self) -> Result<Spanned<Expr>> { | ||
| 93 | let Spanned { inner: token, span } = match self.peek() { | ||
| 94 | Some(spanned) => spanned, | ||
| 95 | None => todo!("unexpected eof"), | ||
| 96 | }; | ||
| 97 | |||
| 98 | let expr = match token { | ||
| 99 | Token::LeftPar => { | ||
| 100 | self.consume(); | ||
| 101 | let list = self.parse_list()?; | ||
| 102 | let expr = if !list.is_empty() { | ||
| 103 | Expr::List(list) | ||
| 104 | } else { | ||
| 105 | Expr::Atom(Atom::Nil) | ||
| 106 | }; | ||
| 107 | |||
| 108 | Spanned::new(expr, Span::new(span.start, self.cursor)) | ||
| 109 | } | ||
| 110 | Token::RightPar => todo!("unexpected par"), | ||
| 111 | Token::Quote => { | ||
| 112 | self.consume(); | ||
| 113 | let quote = Spanned::new( | ||
| 114 | Expr::Atom(Atom::Symbol("quote".into())), | ||
| 115 | Span::new(span.start, self.cursor), | ||
| 116 | ); | ||
| 117 | let expr = self.parse_expr()?; | ||
| 118 | |||
| 119 | Spanned::new( | ||
| 120 | Expr::List(vec![quote, expr]), | ||
| 121 | Span::new(span.start, self.cursor), | ||
| 122 | ) | ||
| 123 | } | ||
| 124 | Token::Number(number) => { | ||
| 125 | self.consume(); | ||
| 126 | |||
| 127 | Spanned::new( | ||
| 128 | Expr::Atom(parse_number(number)), | ||
| 129 | Span::new(span.start, self.cursor), | ||
| 130 | ) | ||
| 131 | } | ||
| 132 | Token::String(string) => { | ||
| 133 | self.consume(); | ||
| 134 | |||
| 135 | Spanned::new( | ||
| 136 | Expr::Atom(parse_string(string)), | ||
| 137 | Span::new(span.start, self.cursor), | ||
| 138 | ) | ||
| 139 | } | ||
| 140 | Token::UnclosedString(string) => { | ||
| 141 | self.consume(); | ||
| 142 | todo!("unclosed string {string:?}") | ||
| 143 | } | ||
| 144 | Token::Symbol(symbol) => { | ||
| 145 | self.consume(); | ||
| 146 | |||
| 147 | Spanned::new( | ||
| 148 | Expr::Atom(parse_symbol(symbol)), | ||
| 149 | Span::new(span.start, self.cursor), | ||
| 150 | ) | ||
| 151 | } | ||
| 152 | }; | ||
| 153 | |||
| 154 | Ok(expr) | ||
| 155 | } | ||
| 156 | |||
| 157 | fn parse_list(&mut self) -> Result<Vec<Spanned<Expr>>> { | ||
| 158 | let mut list = Vec::new(); | ||
| 159 | |||
| 160 | while let Some(Spanned { inner: token, .. }) = self.peek() { | ||
| 161 | match token { | ||
| 162 | Token::RightPar => { | ||
| 163 | self.consume(); | ||
| 164 | return Ok(list); | ||
| 165 | } | ||
| 166 | _ => list.push(self.parse_expr()?), | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | todo!("unclosed par") | ||
| 171 | } | ||
| 172 | |||
| 173 | pub fn parse(mut self) -> Result<Program> { | ||
| 174 | let mut program = Vec::new(); | ||
| 175 | |||
| 176 | while self.peek().is_some() { | ||
| 177 | program.push(self.parse_expr()?) | ||
| 178 | } | ||
| 179 | |||
| 180 | Ok(Program(program)) | ||
| 181 | } | ||
| 182 | } | ||
