Skip to content

Instantly share code, notes, and snippets.

@mystor
Created January 25, 2015 21:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mystor/aa6a79878f2de033c27d to your computer and use it in GitHub Desktop.
Save mystor/aa6a79878f2de033c27d to your computer and use it in GitHub Desktop.
Ducky Preprocessor
/target
/Cargo.lock
[package]
name = "dpp"
version = "0.0.1"
authors = ["Michael Layzell <michael@thelayzells.com>"]
[dependencies.duckyc_syntax]
# I'll push the duckyc_syntax crate up to github at some point
# (It's only been recently extracted from duckyc)
# For now, I'll just use a path (I'll update the gist when the crate has been pushed)
path = "../duckyc_syntax"
[[bin]]
name = "dpp"
# This is a hack so that the entire Cargo project can be put in a gist.
# gists cannot contain directories, so the source has to be in the same
# directory as this file.
path = "main.rs"
#![allow(unstable)]
extern crate duckyc_syntax;
use duckyc_syntax::lexer::{self, Token};
use duckyc_syntax::lexer::Token::*;
use duckyc_syntax::parser::State;
use duckyc_syntax::intern::Atom;
// In dpp, we want to have directives. Directives control the way that code executes
// I'm mimicing Rust here, and want to use #[] for directives. So, for example,
// #[if x]
// /* some tokens here */
// #[else]
// /* some tokens here */
// #[endif]
// would check if x was defined, and if it was include the following block.
// You can also write #[define x y] to define x to y, and just #x to inline the value of x at that location.
#[derive(Clone, Show)]
enum Hunk {
// Just a direct token dump -> no preprocessing to be done.
Tok(Token),
// Notably absent:
// Include(Atom) -> IO in Rust is super unstable right now, so I'm avoiding dealing with it
// Macro(Atom, Vec<Atom>, Vec<Hunk>) -> Macros which accept arguments (I may add this to the parser later)
Define(Atom, Vec<Hunk>), // #[define a ...], binds a to the ...
Expand(Atom), // #a => maps to whatever a is bound to
If(Atom, Vec<Hunk>, Vec<Hunk>), // #[if x] ... #[else] ... #[endif]
// These hunks are technically invalid, and can't be used for anything
// They are used as values to be passed around while parsing the structure
Else,
EndIf
}
// Some predicates which are used to determine when to stop parsing directives
fn until_rbracket_pred(hunk: &Hunk) -> bool {
match *hunk {
Hunk::Tok(RBRACKET) => true,
_ => false
}
}
fn until_endif_pred(hunk: &Hunk) -> bool {
match *hunk {
Hunk::EndIf => true,
_ => false
}
}
fn until_else_endif_pred(hunk: &Hunk) -> bool {
match *hunk {
Hunk::EndIf => true,
Hunk::Else => true,
_ => false
}
}
fn never_pred(_: &Hunk) -> bool { false }
fn parse_directive(state: &mut State, ty: &Atom, body: Vec<Hunk>) -> Result<Hunk, String> {
match ty.as_slice() {
"if" => {
if let Hunk::Tok(IDENT(ref condition)) = body[0] {
let mut cons = try!(parse_groups(state, until_else_endif_pred));
match cons.pop() {
Some(Hunk::Else) => {
let mut alt = try!(parse_groups(state, until_endif_pred));
if let Some(Hunk::EndIf) = alt.pop() {
Ok(Hunk::If(condition.clone(), cons, alt))
} else {
Err(format!("Must terminate else block with #[endif]"))
}
}
Some(Hunk::EndIf) => {
Ok(Hunk::If(condition.clone(), cons, vec![]))
}
_ => {
Err(format!("Unexpected end of file while parsing if"))
}
}
} else {
Err(format!("Expected condition for if"))
}
}
"else" => {
if body.is_empty() {
Ok(Hunk::Else)
} else {
Err(format!("Else directive takes no paramaters"))
}
}
"endif" => {
if body.is_empty() {
Ok(Hunk::EndIf)
} else {
Err(format!("EndIf directive takes no paramaters"))
}
}
"define" => {
if let Hunk::Tok(IDENT(ref atom)) = body[0] {
Ok(Hunk::Define(atom.clone(), body[1..].iter().cloned().collect()))
} else {
Err(format!("Can only #[define] idents!"))
}
}
_ => Err(format!("Unrecognized directive: {:?}", ty))
}
}
fn parse_group(state: &mut State) -> Result<Hunk, String> {
match state.eat() {
Some(&POUND) => {
match state.eat() {
Some(&LBRACKET) => {
// Require the next token to be an ident
if let Some(&IDENT(ref atom)) = state.eat() {
let mut hunks = try!(parse_groups(state, until_rbracket_pred));
if let Some(Hunk::Tok(RBRACKET)) = hunks.pop() {
parse_directive(state, atom, hunks)
} else {
Err(format!("Expected ] after directive"))
}
} else {
Err(format!("Expected IDENT as first token in directive"))
}
},
Some(&IDENT(ref atom)) => {
Ok(Hunk::Expand(atom.clone()))
},
_ => Err(format!("Expected [ or IDENT after #"))
}
}
Some(tok) => {
Ok(Hunk::Tok(tok.clone()))
}
None => unreachable!()
}
}
fn parse_groups(state: &mut State, done_pred: fn(&Hunk) -> bool) -> Result<Vec<Hunk>, String> {
let mut hunks = Vec::new();
loop {
if let Some(_) = state.peek() {
let hunk = try!(parse_group(state));
let done = done_pred(&hunk);
hunks.push(hunk);
if done { break }
} else { break }
}
Ok(hunks)
}
fn main() {
let test_code = r#"
#[define compose_id]
let id = fn (x) { x };
#[define number 5]
let id2 = #[if compose_id] id(id) #[else] id #[endif];
let a = id2(#number);
#[define number 10]
let b = id2(#number); "#;
let lexed = lexer::lex(test_code).unwrap();
let groups = parse_groups(&mut State::new(lexed.as_slice()), never_pred).unwrap();
for group in groups.iter() {
println!("{:?}", group);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment