Last active
August 22, 2022 11:38
-
-
Save aripiprazole/e298ae41d5f8fbf6fc199ddc5eb2421d to your computer and use it in GitHub Desktop.
Simple programming language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Simple programming language | |
// Copyright (C) 2022 Gabrielle | |
// | |
// This program is free software: you can redistribute it and/or modify | |
// it under the terms of the GNU General Public License as published by | |
// the Free Software Foundation, either version 3 of the License, or | |
// (at your option) any later version. | |
// | |
// This program is distributed in the hope that it will be useful, | |
// but WITHOUT ANY WARRANTY; without even the implied warranty of | |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
// GNU General Public License for more details. | |
// | |
// You should have received a copy of the GNU General Public License | |
// along with this program. If not, see <https://www.gnu.org/licenses/>. | |
use std::fmt::Display; | |
use chumsky::prelude::*; | |
pub type Span = std::ops::Range<usize>; | |
#[derive(Debug)] | |
pub struct Ident { | |
pub text: String, | |
pub loc: Span, | |
} | |
#[derive(Debug)] | |
pub enum Tok { | |
Let, | |
Fn, | |
Class, | |
Trait, | |
At, | |
LParen, | |
RParen, | |
LBracket, | |
RBracket, | |
Plus, | |
Minus, | |
Star, | |
Bar, | |
Colon, | |
Semi, | |
Arrow, | |
Underscore, | |
Dot, | |
Str(String), | |
Num(String), | |
Ident(Ident), | |
} | |
impl Display for Tok { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
match self { | |
Tok::Let => write!(f, "t:let"), | |
Tok::Fn => write!(f, "t:fn"), | |
Tok::Class => write!(f, "t:class"), | |
Tok::Trait => write!(f, "t:trait"), | |
Tok::At => write!(f, "t:@"), | |
Tok::LParen => write!(f, "t:("), | |
Tok::RParen => write!(f, "t:)"), | |
Tok::LBracket => write!(f, "t:{{"), | |
Tok::RBracket => write!(f, "t:}}"), | |
Tok::Plus => write!(f, "t:+"), | |
Tok::Minus => write!(f, "t:-"), | |
Tok::Star => write!(f, "t:*"), | |
Tok::Bar => write!(f, "t:/"), | |
Tok::Colon => write!(f, "t::"), | |
Tok::Semi => write!(f, "t:;"), | |
Tok::Arrow => write!(f, "t:=>"), | |
Tok::Underscore => write!(f, "t:_"), | |
Tok::Dot => write!(f, "t:."), | |
Tok::Str(txt) => write!(f, "t:Str('{}')", txt), | |
Tok::Num(n) => write!(f, "t:Num({})", n), | |
Tok::Ident(id) => write!(f, "t:Ident({})", id.text), | |
} | |
} | |
} | |
pub fn lexer() -> impl Parser<char, Vec<(Tok, Span)>, Error = Simple<char>> { | |
let num_lexer = text::int::<_, Simple<char>>(10) | |
.chain::<char, _, _>(just('.').chain(text::digits(10)).or_not().flatten()) | |
.collect::<String>() | |
.map(Tok::Num); | |
let str_lexer = just::<_, _, Simple<char>>('"') | |
.ignore_then(filter(|c| *c != '"').repeated()) | |
.then_ignore(just('"')) | |
.collect::<String>() | |
.map(Tok::Str); | |
let ident_lexer = | |
text::ident::<_, Simple<char>>().map_with_span(|ident, span| match ident.as_str() { | |
"fn" => Tok::Fn, | |
"class" => Tok::Class, | |
"let" => Tok::Let, | |
"trait" => Tok::Trait, | |
ident => Tok::Ident(Ident { | |
text: ident.to_string(), | |
loc: span, | |
}), | |
}); | |
let tok_lexer = num_lexer | |
.or(str_lexer) | |
.or(ident_lexer) | |
.or(just::<_, _, Simple<char>>("(").map(|_| Tok::LParen)) | |
.or(just::<_, _, Simple<char>>(")").map(|_| Tok::RParen)) | |
.or(just::<_, _, Simple<char>>("{").map(|_| Tok::LBracket)) | |
.or(just::<_, _, Simple<char>>("}").map(|_| Tok::RBracket)) | |
.or(just::<_, _, Simple<char>>(";").map(|_| Tok::Semi)) | |
.or(just::<_, _, Simple<char>>(":").map(|_| Tok::Colon)) | |
.or(just::<_, _, Simple<char>>("@").map(|_| Tok::At)) | |
.or(just::<_, _, Simple<char>>("+").map(|_| Tok::Plus)) | |
.or(just::<_, _, Simple<char>>("-").map(|_| Tok::Minus)) | |
.or(just::<_, _, Simple<char>>("*").map(|_| Tok::Star)) | |
.or(just::<_, _, Simple<char>>("/").map(|_| Tok::Bar)) | |
.or(just::<_, _, Simple<char>>("=>").map(|_| Tok::Arrow)) | |
.or(just::<_, _, Simple<char>>("=>").map(|_| Tok::Arrow)) | |
.recover_with(skip_then_retry_until([])); | |
let _comment_lexer = just::<_, _, Simple<char>>("//") | |
.then(take_until(just("\n"))) | |
.padded(); | |
tok_lexer | |
// .padded_by(comment_lexer) | |
.padded() | |
.map_with_span(|a, b| (a, b)) | |
.repeated() | |
} | |
#[cfg(test)] | |
mod tests { | |
use chumsky::Parser; | |
use super::lexer; | |
#[test] | |
fn test_lexer() { | |
println!("{:?}", lexer().parse_recovery("@data class Person")); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Simple programming language | |
// Copyright (C) 2022 Gabrielle | |
// | |
// This program is free software: you can redistribute it and/or modify | |
// it under the terms of the GNU General Public License as published by | |
// the Free Software Foundation, either version 3 of the License, or | |
// (at your option) any later version. | |
// | |
// This program is distributed in the hope that it will be useful, | |
// but WITHOUT ANY WARRANTY; without even the implied warranty of | |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
// GNU General Public License for more details. | |
// | |
// You should have received a copy of the GNU General Public License | |
// along with this program. If not, see <https://www.gnu.org/licenses/>. | |
use ariadne::Source; | |
use chumsky::Parser; | |
use parsing::{exp_parser, Exp}; | |
use reporting::build_reason; | |
pub mod lexing; | |
pub mod parsing; | |
pub mod reporting; | |
type SumFunc = unsafe extern "C" fn(u64, u64, u64) -> u64; | |
struct Codegen<'a> { | |
context: &'a inkwell::context::Context, | |
module: inkwell::module::Module<'a>, | |
builder: inkwell::builder::Builder<'a>, | |
ee: inkwell::execution_engine::ExecutionEngine<'a>, | |
} | |
impl<'a> Codegen<'a> { | |
fn jit_compile_sum(&self) -> Option<inkwell::execution_engine::JitFunction<SumFunc>> { | |
let i64_type = self.context.i64_type(); | |
let fn_type = i64_type.fn_type(&[i64_type.into(), i64_type.into(), i64_type.into()], false); | |
let function = self.module.add_function("sum", fn_type, None); | |
let basic_block = self.context.append_basic_block(function, "entry"); | |
self.builder.position_at_end(basic_block); | |
let x = function.get_nth_param(0)?.into_int_value(); | |
let y = function.get_nth_param(1)?.into_int_value(); | |
let z = function.get_nth_param(2)?.into_int_value(); | |
let sum = self.builder.build_int_add(x, y, "sum"); | |
let sum = self.builder.build_int_add(sum, z, "sum"); | |
self.builder.build_return(Some(&sum)); | |
unsafe { self.ee.get_function("sum").ok() } | |
} | |
} | |
fn main() { | |
let context = inkwell::context::Context::create(); | |
let module = context.create_module("sum"); | |
let ee = module | |
.create_jit_execution_engine(inkwell::OptimizationLevel::None) | |
.expect("Failed to create JIT execution engine"); | |
let codegen = Codegen { | |
context: &context, | |
module, | |
builder: context.create_builder(), | |
ee, | |
}; | |
let sum = codegen | |
.jit_compile_sum() | |
.ok_or("Unable to JIT compile `sum`") | |
.expect("Unable to JIT compile `sum`"); | |
println!("{}", unsafe { sum.call(10, 10, 10) }); | |
println!("{:?}", parse_exp("1+1")); | |
} | |
pub fn parse_exp(src: &str) -> Option<Exp> { | |
let (exp, errs) = exp_parser().parse_recovery(src); | |
errs.into_iter().for_each(|err| { | |
build_reason(err) | |
.finish() | |
.print(Source::from(&src)) | |
.unwrap(); | |
}); | |
exp | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Simple programming language | |
// Copyright (C) 2022 Gabrielle | |
// | |
// This program is free software: you can redistribute it and/or modify | |
// it under the terms of the GNU General Public License as published by | |
// the Free Software Foundation, either version 3 of the License, or | |
// (at your option) any later version. | |
// | |
// This program is distributed in the hope that it will be useful, | |
// but WITHOUT ANY WARRANTY; without even the implied warranty of | |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
// GNU General Public License for more details. | |
// | |
// You should have received a copy of the GNU General Public License | |
// along with this program. If not, see <https://www.gnu.org/licenses/>. | |
use chumsky::prelude::*; | |
#[derive(Debug, Clone)] | |
pub enum Lit { | |
Int(i32), | |
Float(f32), | |
Str(String), | |
Char(char), | |
} | |
#[derive(Debug, Clone)] | |
pub enum UnaryOp { | |
Pos, | |
Neg, | |
} | |
#[derive(Debug, Clone)] | |
pub enum CmpOp { | |
Eq, | |
Neq, | |
Gt, | |
Gte, | |
Lt, | |
Lte, | |
} | |
#[derive(Debug, Clone)] | |
pub enum BinOp { | |
Sub, | |
Sum, | |
Div, | |
Mul, | |
} | |
#[derive(Debug, Clone)] | |
pub enum Exp { | |
Lit(Lit), | |
Unary(UnaryOp, Box<Exp>), | |
Cmp(CmpOp, Box<Exp>, Box<Exp>), | |
Bin(BinOp, Box<Exp>, Box<Exp>), | |
} | |
impl std::fmt::Display for Lit { | |
fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result { | |
todo!() | |
} | |
} | |
impl std::fmt::Display for Exp { | |
fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result { | |
todo!() | |
} | |
} | |
pub fn exp_parser() -> impl Parser<char, Exp, Error = Simple<char>> { | |
let int_parser = text::int(10) | |
.chain::<char, _, _>(just('.').chain(text::digits(10)).or_not().flatten()) | |
.collect::<String>() | |
.map(|x| match x { | |
x if x.contains(".") => Lit::Float(x.parse::<f32>().unwrap()), | |
x => Lit::Int(x.parse::<i32>().unwrap()), | |
}) | |
.map(Exp::Lit); | |
let call_parser = int_parser; | |
let unary_parser = recursive(|unary_parser| { | |
let op = choice(( | |
just('+').map(|_| UnaryOp::Pos), | |
just('-').map(|_| UnaryOp::Neg), | |
)); | |
choice(( | |
op.then(unary_parser) | |
.map(|(op, exp)| Exp::Unary(op, Box::new(exp))), | |
call_parser, | |
)) | |
}); | |
let factor_parser = { | |
let op = choice((just('/').map(|_| BinOp::Div), just('*').map(|_| BinOp::Mul))); | |
unary_parser | |
.clone() | |
.then(op.then(unary_parser.clone()).repeated()) | |
.foldl(|acc, (op, next)| Exp::Bin(op, Box::new(acc), Box::new(next))) | |
}; | |
let term_parser = { | |
let op = choice((just('+').map(|_| BinOp::Sum), just('-').map(|_| BinOp::Sub))); | |
factor_parser | |
.clone() | |
.then(op.then(factor_parser.clone()).repeated()) | |
.foldl(|acc, (op, next)| Exp::Bin(op, Box::new(acc), Box::new(next))) | |
}; | |
term_parser | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Simple programming language | |
// Copyright (C) 2022 Gabrielle | |
// | |
// This program is free software: you can redistribute it and/or modify | |
// it under the terms of the GNU General Public License as published by | |
// the Free Software Foundation, either version 3 of the License, or | |
// (at your option) any later version. | |
// | |
// This program is distributed in the hope that it will be useful, | |
// but WITHOUT ANY WARRANTY; without even the implied warranty of | |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
// GNU General Public License for more details. | |
// | |
// You should have received a copy of the GNU General Public License | |
// along with this program. If not, see <https://www.gnu.org/licenses/>. | |
use ariadne::{Color, Fmt, Label, Report, ReportBuilder}; | |
use chumsky::prelude::Simple; | |
pub fn build_reason(err: Simple<char>) -> ReportBuilder<std::ops::Range<usize>> { | |
let report = Report::build(ariadne::ReportKind::Error, (), err.span().start); | |
match err.reason() { | |
chumsky::error::SimpleReason::Unclosed { span, delimiter } => report | |
.with_message(format!( | |
"Unclosed delimiter {}", | |
delimiter.fg(Color::Yellow) | |
)) | |
.with_label( | |
Label::new(span.clone()) | |
.with_message(format!( | |
"Unclosed delimiter {}", | |
delimiter.fg(Color::Yellow) | |
)) | |
.with_color(Color::Yellow), | |
) | |
.with_label( | |
Label::new(err.span()) | |
.with_message(format!( | |
"Must be closed before this {}", | |
err.found() | |
.map(|x| x.to_string()) | |
.unwrap_or("end of file".to_string()) | |
.fg(Color::Red) | |
)) | |
.with_color(Color::Red), | |
), | |
chumsky::error::SimpleReason::Unexpected => report | |
.with_message(format!( | |
"{}, expected {}", | |
if err.found().is_some() { | |
"Unexpected token in input" | |
} else { | |
"Unexpected end of input" | |
}, | |
if err.expected().len() == 0 { | |
"something else".to_string() | |
} else { | |
err.expected() | |
.map(|expected| match expected { | |
Some(expected) => expected.to_string(), | |
None => "end of input".to_string(), | |
}) | |
.collect::<Vec<_>>() | |
.join(", ") | |
} | |
)) | |
.with_label( | |
Label::new(err.span()) | |
.with_message(format!( | |
"Unexpected token {}", | |
err.found() | |
.map(|x| x.to_string()) | |
.unwrap_or("end of file".into()) | |
.fg(Color::Red) | |
)) | |
.with_color(Color::Red), | |
), | |
chumsky::error::SimpleReason::Custom(msg) => report.with_message(msg).with_label( | |
Label::new(err.span()) | |
.with_message(format!("{}", msg.fg(Color::Red))) | |
.with_color(Color::Red), | |
), | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment