Skip to content

Instantly share code, notes, and snippets.

@aripiprazole
Last active August 22, 2022 11:30
Show Gist options
  • Save aripiprazole/aa4e93b36f4cd0173a702cbdf477a025 to your computer and use it in GitHub Desktop.
Save aripiprazole/aa4e93b36f4cd0173a702cbdf477a025 to your computer and use it in GitHub Desktop.
Parser for a typed lox programming language
/*
* Rox is a simple object oriented language with non static typing
* Copyright (C) 2022 Gabrielle Guimarães de Oliveira
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
use std::fmt::Display;
#[derive(Debug, PartialEq, Eq, PartialOrd, Hash, Clone, Copy)]
pub struct Pos(i32, i32);
#[derive(Debug, PartialEq, Eq, PartialOrd, Hash, Clone, Copy)]
pub enum Loc {
Range(Pos, Pos),
Generated,
}
impl Loc {
pub fn col(_file: RoxFile) -> i32 {
0
}
pub fn row(_file: RoxFile) -> i32 {
0
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Hash)]
pub struct Identifier(pub String, pub Loc);
impl Identifier {
pub fn empty() -> Self {
return Self(String::new(), Loc::Generated);
}
}
impl Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.0.as_str())
}
}
#[derive(Debug, PartialEq)]
pub struct RoxFile {
pub name: String,
pub statements: Vec<Stmt>,
pub content: String,
}
#[derive(Debug, PartialEq)]
pub struct Function {
pub name: Identifier,
pub parameters: Vec<Identifier>,
pub statements: Vec<Stmt>,
pub loc: Loc,
}
#[derive(Debug, PartialEq)]
pub enum Expr {
Assign(Identifier, Box<Expr>, Loc),
Binary(Box<Expr>, Identifier, Box<Expr>, Loc),
Call(Box<Expr>, Vec<Expr>, Loc),
Get(Box<Expr>, Identifier, Loc),
Set(Box<Expr>, Identifier, Box<Expr>, Loc),
Group(Box<Expr>, Loc),
False(Loc),
True(Loc),
Nil(Loc),
Integer(i32, Loc),
Decimal(f32, Loc),
String(String, Loc),
Super(Loc),
This(Loc), //
Access(Identifier, Loc),
}
#[derive(Debug, PartialEq)]
pub enum ForVarDecl {
Var(Identifier, Option<Expr>, Loc),
Expr(Expr, Loc),
}
#[derive(Debug, PartialEq)]
pub enum Stmt {
Block(Vec<Stmt>, Loc),
Class(Identifier, Option<Identifier>, Vec<Function>, Loc),
Expr(Expr, Loc),
Fun(Function, Loc),
If(Expr, Box<Stmt>, Box<Option<Stmt>>, Loc),
Print(Expr, Loc),
Return(Option<Expr>, Loc),
Var(Identifier, Option<Expr>, Loc),
For(ForVarDecl, Option<Expr>, Option<Expr>, Box<Stmt>, Loc),
While(Expr, Box<Stmt>, Loc),
}
/*
* Rox is a simple object oriented language with non static typing
* Copyright (C) 2022 Gabrielle Guimarães de Oliveira
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
pub mod ast;
pub mod parser;
extern crate pest;
#[macro_use]
extern crate pest_derive;
fn main() {
}
/*
* Rox is a simple object oriented language with non static typing
* Copyright (C) 2022 Gabrielle Guimarães de Oliveira
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
use crate::ast::{Expr, ForVarDecl, Function, Identifier, Loc, Stmt};
use pest::iterators::Pair;
use pest::Parser;
use substring::Substring;
#[derive(Parser)]
#[grammar = "rox.pest"]
pub struct RoxParser;
pub fn build_identifier(pair: Pair<Rule>) -> Identifier {
Identifier(pair.as_str().to_string(), Loc::Generated)
}
pub fn build_function(pair: Pair<Rule>) -> Function {
let mut elements = pair.into_inner();
Function {
name: build_identifier(elements.next().unwrap()),
parameters: elements
.next()
.unwrap()
.into_inner()
.map(build_identifier)
.collect(),
statements: elements
.next()
.unwrap()
.into_inner()
.map(build_stmt)
.collect(),
loc: Loc::Generated,
}
}
pub fn build_stmt(pair: Pair<Rule>) -> Stmt {
match pair.as_rule() {
Rule::class_decl => {
let mut elements = pair.into_inner();
let name = build_identifier(elements.next().unwrap());
let extends = elements.next().map(build_identifier);
let functions = elements.map(build_function).collect();
Stmt::Class(name, extends, functions, Loc::Generated)
}
Rule::fun_decl => Stmt::Fun(
build_function(pair.into_inner().next().unwrap()),
Loc::Generated,
),
Rule::var_decl => {
let mut elements = pair.into_inner();
Stmt::Var(
build_identifier(elements.next().unwrap()),
elements.next().map(build_expr),
Loc::Generated,
)
}
Rule::expr_stmt => Stmt::Expr(
build_expr(pair.into_inner().next().unwrap()),
Loc::Generated,
),
Rule::for_stmt => {
let mut elements = pair.into_inner();
let for_var_decl_pair = elements.next().unwrap();
let for_var_decl = match for_var_decl_pair.as_rule() {
Rule::var_decl => {
let mut elements = for_var_decl_pair.into_inner();
ForVarDecl::Var(
build_identifier(elements.next().unwrap()),
elements.next().map(build_expr),
Loc::Generated,
)
}
Rule::expr_stmt => ForVarDecl::Expr(
build_expr(for_var_decl_pair.into_inner().next().unwrap()),
Loc::Generated,
),
unknown_for_var_decl => panic!("Unknown for var decl: {:?}", unknown_for_var_decl),
};
Stmt::For(
for_var_decl,
elements.next().map(build_expr),
elements.next().map(build_expr),
Box::new(build_stmt(elements.next().unwrap())),
Loc::Generated,
)
}
Rule::if_stmt => {
let mut elements = pair.into_inner();
Stmt::If(
build_expr(elements.next().unwrap()),
Box::new(build_stmt(elements.next().unwrap())),
Box::new(elements.next().map(build_stmt)),
Loc::Generated,
)
}
Rule::print_stmt => Stmt::Print(
build_expr(pair.into_inner().next().unwrap()),
Loc::Generated,
),
Rule::return_stmt => Stmt::Return(pair.into_inner().next().map(build_expr), Loc::Generated),
Rule::block => Stmt::Block(pair.into_inner().map(build_stmt).collect(), Loc::Generated),
unknown_stmt => panic!("Unknown stmt: {:?}", unknown_stmt),
}
}
pub fn build_expr(pair: Pair<Rule>) -> Expr {
match pair.as_rule() {
Rule::integer => Expr::Integer(pair.as_str().to_string().parse().unwrap(), Loc::Generated),
Rule::decimal => Expr::Decimal(pair.as_str().to_string().parse().unwrap(), Loc::Generated),
Rule::ident => Expr::Access(build_identifier(pair), Loc::Generated),
Rule::v_true => Expr::True(Loc::Generated),
Rule::v_false => Expr::False(Loc::Generated),
Rule::v_nil => Expr::Nil(Loc::Generated),
Rule::v_this => Expr::This(Loc::Generated),
Rule::v_super => Expr::Super(Loc::Generated),
Rule::group => Expr::Group(
Box::new(build_expr(pair.into_inner().next().unwrap())),
Loc::Generated,
),
Rule::string => {
let s = pair.as_str();
Expr::String(s.substring(1, s.len() - 1).to_string(), Loc::Generated)
}
Rule::assign => {
let mut elements = pair.into_inner();
let name = elements.next().unwrap();
let value = elements.next().unwrap();
Expr::Assign(
build_identifier(name),
Box::new(build_expr(value)),
Loc::Generated,
)
}
Rule::logical_or
| Rule::logical_and
| Rule::equality
| Rule::comparison
| Rule::term
| Rule::factor
| Rule::unary => {
let mut elements = pair.into_inner();
let left = elements.next().unwrap();
elements.skip(1).map(|pair| pair.into_inner()).fold(
build_expr(left),
|acc, mut pairs| {
let operator = build_identifier(pairs.next().unwrap());
let right = build_expr(pairs.next().unwrap());
Expr::Binary(Box::new(acc), operator, Box::new(right), Loc::Generated)
},
)
}
Rule::call => {
let mut elements = pair.into_inner();
let callee = elements.next().unwrap();
elements.fold(build_expr(callee), |acc, pair| match pair.as_rule() {
Rule::call_function => {
let arguments = pair
.into_inner()
.next()
.unwrap()
.into_inner()
.map(build_expr)
.collect();
Expr::Call(Box::new(acc), arguments, Loc::Generated)
}
Rule::call_get => Expr::Get(
Box::new(acc),
build_identifier(pair.into_inner().next().unwrap()),
Loc::Generated,
),
unknown_call => panic!("Unknown call: {:?}", unknown_call),
})
}
unknown_expr => panic!("Unknown expr: {:?}", unknown_expr),
}
}
pub fn parse_unwrapped(rule: Rule, input: &str) -> Pair<Rule> {
RoxParser::parse(rule, input)
.unwrap_or_else(|err| panic!("{}", err))
.next()
.unwrap()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_class_decl() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "class Foo {}")),
Stmt::Class(
Identifier("Foo".to_string(), Loc::Generated),
None,
vec![],
Loc::Generated
)
);
}
#[test]
fn test_parse_class_extends_decl() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "class Foo < Bar {}")),
Stmt::Class(
Identifier("Foo".to_string(), Loc::Generated),
Some(Identifier("Bar".to_string(), Loc::Generated)),
vec![],
Loc::Generated
)
);
}
#[test]
fn test_parse_class_extends_with_functions_decl() {
assert_eq!(
build_stmt(parse_unwrapped(
Rule::decl,
"class Foo < Bar { constructor(a, b) {} }"
)),
Stmt::Class(
Identifier("Foo".to_string(), Loc::Generated),
Some(Identifier("Bar".to_string(), Loc::Generated)),
vec![Function {
name: Identifier("constructor".to_string(), Loc::Generated),
parameters: vec![
Identifier("a".to_string(), Loc::Generated),
Identifier("b".to_string(), Loc::Generated)
],
statements: vec![],
loc: Loc::Generated
}],
Loc::Generated
)
);
}
#[test]
fn test_parse_fun_decl() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "fun println(message) {}")),
Stmt::Fun(
Function {
name: Identifier("println".to_string(), Loc::Generated),
parameters: vec![Identifier("message".to_string(), Loc::Generated),],
statements: vec![],
loc: Loc::Generated
},
Loc::Generated
)
);
}
#[test]
fn test_parse_var_decl() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "var a;")),
Stmt::Var(
Identifier("a".to_string(), Loc::Generated),
None,
Loc::Generated
)
);
}
#[test]
fn test_parse_var_with_value_decl() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "var a = 10;")),
Stmt::Var(
Identifier("a".to_string(), Loc::Generated),
Some(Expr::Integer(10, Loc::Generated)),
Loc::Generated
)
);
}
#[test]
fn test_parse_expr_stmt() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "10;")),
Stmt::Expr(Expr::Integer(10, Loc::Generated), Loc::Generated)
);
}
#[test]
fn test_parse_for_stmt() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "for(10; 10; 10;) 10;")),
Stmt::For(
ForVarDecl::Expr(Expr::Integer(10, Loc::Generated), Loc::Generated),
Some(Expr::Integer(10, Loc::Generated)),
Some(Expr::Integer(10, Loc::Generated)),
Box::new(Stmt::Expr(
Expr::Integer(10, Loc::Generated),
Loc::Generated
)),
Loc::Generated
)
);
}
#[test]
fn test_parse_for_var_stmt() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "for(var a = 10; 10; 10;) 10;")),
Stmt::For(
ForVarDecl::Var(
Identifier("a".to_string(), Loc::Generated),
Some(Expr::Integer(10, Loc::Generated)),
Loc::Generated
),
Some(Expr::Integer(10, Loc::Generated)),
Some(Expr::Integer(10, Loc::Generated)),
Box::new(Stmt::Expr(
Expr::Integer(10, Loc::Generated),
Loc::Generated
)),
Loc::Generated
)
);
}
#[test]
fn test_parse_if_stmt() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "if (10) 10;")),
Stmt::If(
Expr::Integer(10, Loc::Generated),
Box::new(Stmt::Expr(
Expr::Integer(10, Loc::Generated),
Loc::Generated
)),
Box::new(None),
Loc::Generated
)
);
}
#[test]
fn test_parse_if_else_stmt() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "if (10) 10; else 10;")),
Stmt::If(
Expr::Integer(10, Loc::Generated),
Box::new(Stmt::Expr(
Expr::Integer(10, Loc::Generated),
Loc::Generated
)),
Box::new(Some(Stmt::Expr(
Expr::Integer(10, Loc::Generated),
Loc::Generated
))),
Loc::Generated
)
);
}
#[test]
fn test_parse_print_stmt() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "print 10;")),
Stmt::Print(Expr::Integer(10, Loc::Generated), Loc::Generated)
);
}
#[test]
fn test_parse_return_stmt() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "return;")),
Stmt::Return(None, Loc::Generated)
);
}
#[test]
fn test_parse_return_expr_stmt() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "return 10;")),
Stmt::Return(Some(Expr::Integer(10, Loc::Generated)), Loc::Generated)
);
}
#[test]
fn test_parse_block_stmt() {
assert_eq!(
build_stmt(parse_unwrapped(Rule::decl, "{ 10; }")),
Stmt::Block(
vec![Stmt::Expr(
Expr::Integer(10, Loc::Generated),
Loc::Generated
)],
Loc::Generated
)
);
}
#[test]
fn test_parse_get() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "a.b")),
Expr::Get(
Box::new(Expr::Access(
Identifier("a".to_string(), Loc::Generated),
Loc::Generated
)),
Identifier("b".to_string(), Loc::Generated),
Loc::Generated
)
);
}
#[test]
fn test_parse_call() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "a(10)")),
Expr::Call(
Box::new(Expr::Access(
Identifier("a".to_string(), Loc::Generated),
Loc::Generated
)),
vec![Expr::Integer(10, Loc::Generated)],
Loc::Generated
)
);
}
#[test]
fn test_parse_assign_recursive() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "a = b = 10")),
Expr::Assign(
Identifier("a".to_string(), Loc::Generated),
Box::new(Expr::Assign(
Identifier("b".to_string(), Loc::Generated),
Box::new(Expr::Integer(10, Loc::Generated)),
Loc::Generated
)),
Loc::Generated
)
);
}
#[test]
fn test_parse_assign() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "a = 10")),
Expr::Assign(
Identifier("a".to_string(), Loc::Generated),
Box::new(Expr::Integer(10, Loc::Generated)),
Loc::Generated
)
);
}
#[test]
fn test_parse_string() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "'hello'")),
Expr::String("hello".to_string(), Loc::Generated)
);
}
#[test]
fn test_parse_integer() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "10")),
Expr::Integer(10, Loc::Generated)
);
}
#[test]
fn test_parse_decimal() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "10.0")),
Expr::Decimal(10.0, Loc::Generated)
);
}
#[test]
fn test_parse_access() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "hello_world")),
Expr::Access(
Identifier("hello_world".to_string(), Loc::Generated),
Loc::Generated
)
);
}
#[test]
fn test_parse_true() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "true")),
Expr::True(Loc::Generated)
);
}
#[test]
fn test_parse_false() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "false")),
Expr::False(Loc::Generated)
);
}
#[test]
fn test_parse_nil() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "nil")),
Expr::Nil(Loc::Generated)
);
}
#[test]
fn test_parse_group() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "(nil)")),
Expr::Group(Box::new(Expr::Nil(Loc::Generated)), Loc::Generated)
);
}
#[test]
fn test_parse_this() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "this")),
Expr::This(Loc::Generated)
);
}
#[test]
fn test_parse_super() {
assert_eq!(
build_expr(parse_unwrapped(Rule::expr, "super")),
Expr::Super(Loc::Generated)
);
}
}
WHITESPACE = _{ " " | "\t" | "\n" }
// Function
function = { ident ~ "(" ~ parameters* ~ ")" ~ block }
parameters = { ident ~ ( "," ~ ident )* }
// Declarations
decl = _{ class_decl | fun_decl | var_decl | stmt }
class_decl = { "class" ~ ident ~ ( "<" ~ ident )? ~ "{" ~ function* ~ "}" }
fun_decl = { "fun" ~ function }
var_decl = { "var" ~ ident ~ ( "=" ~ expr )? ~ ";" }
// Statements
stmt = _{ for_stmt | if_stmt | print_stmt | return_stmt | while_stmt | block | expr_stmt }
for_var_decl = _{ var_decl | expr_stmt }
block = { "{" ~ decl* ~ "}" }
expr_stmt = { expr ~ ";" }
for_stmt = { "for" ~ "(" ~ (for_var_decl | ";") ~ expr? ~ ";" ~ expr? ~ ";" ~ ")" ~ stmt }
print_stmt = { "print" ~ expr ~ ";" }
return_stmt = { "return" ~ expr? ~ ";" }
if_stmt = { "if" ~ "(" ~ expr ~ ")" ~ stmt ~ ( "else" ~ stmt )? }
while_stmt = { "while" ~ "(" ~ expr ~ ")" ~ stmt }
// Expression
expr = _{ assign | logical_or }
assign = { ident ~ "=" ~ expr }
logical_or = { logical_and ~ ( "or" ~ logical_and )* }
logical_and = { equality ~ ( "and" ~ equality )* }
equality = { comparison ~ ( ( "!=" | "==" ) ~ comparison )* }
comparison = { term ~ ( ( ">" | ">=" | "<" | "<=" ) ~ term )* }
term = { factor ~ ( ( "-" | "+" ) ~ factor )* }
factor = { unary ~ ( ( "*" | "/" ) ~ term )* }
unary = { ( "!" | "-" ) ~ unary | call }
call = { primary ~ ( call_function | call_get )* }
call_function = { "(" ~ arguments? ~ ")" }
call_get = { "." ~ ident }
arguments = _{ expr ~ ( "," ~ expr )* }
primary = _{
string
| decimal
| integer
| v_this
| v_super
| v_true
| v_false
| v_nil
| ident
| group
}
group = { "(" ~ expr ~ ")" }
string = @{ "'" ~ ( "''" | (!"'" ~ ANY) )* ~ "'" }
integer = @{ ASCII_DIGIT+ }
decimal = @{ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
ident = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")*}
v_true = @{ "true" }
v_false = @{ "false" }
v_nil = @{ "nil" }
v_this = @{ "this" }
v_super = @{ "super" }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment