Skip to content

Instantly share code, notes, and snippets.

@lfnoise
Last active July 18, 2022 04:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lfnoise/cd666681203df2cc53548c9659fd9b96 to your computer and use it in GitHub Desktop.
Save lfnoise/cd666681203df2cc53548c9659fd9b96 to your computer and use it in GitHub Desktop.
A parser made using rust-peg.
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct Symbol {
name: String,
}
#[derive(Clone, Copy, Debug)]
pub struct SymbolID(usize);
#[derive(Clone, Debug, Default)]
pub struct SymbolTable {
names: HashMap<String, SymbolID>,
}
impl SymbolTable {
pub fn clear(&mut self) {
self.names.clear();
}
pub fn gen(&mut self, s: &str) -> SymbolID {
if self.names.contains_key(s) {
self.names[s]
} else {
let i = SymbolID(self.names.len());
self.names.insert(s.to_owned(), i);
i
}
}
}
#[derive(Debug, Clone)]
pub enum MsgHead {
CurEnvir,
Int(i64),
Float(f64),
String(String),
Symbol(SymbolID),
Lambda(Vec<SymbolID>, Vec<MsgChain>),
}
#[derive(Debug, Clone)]
pub struct Msg {
name: SymbolID,
args: Vec<MsgChain>,
}
#[derive(Debug, Clone)]
pub struct MsgChain {
head: MsgHead,
chain: Vec<Msg>,
}
impl Default for MsgChain {
fn default() -> Self {
MsgChain {
head: MsgHead::CurEnvir,
chain: vec![],
}
}
}
impl MsgChain {
fn new(head: MsgHead) -> Self {
Self {
head,
chain: vec![],
}
}
fn add_msg(&mut self, m: Msg) {
self.chain.push(m);
}
fn add_msgs(&mut self, m: &mut Vec<Msg>) {
self.chain.append(m);
}
}
peg::parser! {
grammar proto_parser() for str {
rule _() = quiet!{[' ' | '\t' | '\n' | '\r' ]*}
rule comma() = "," _
rule semicolon() = ";" _
rule int() -> MsgHead = a:$mantissa() _ { MsgHead::Int(a.parse().unwrap()) }
rule mantissa() = "0" / (['1'..='9'] ['0'..='9']* )
rule expon() = ("e" / "E") ("+" / "-")? int()
rule frac() = "." ['0'..='9']*
rule frac1() = "." ['0'..='9']+
rule float() -> MsgHead = a:$((mantissa() frac()? / frac1()) expon()?) _ { MsgHead::Float(a.parse().unwrap()) }
rule string() -> MsgHead = "\"" a:$([^'"']*) "\"" _ { MsgHead::String(a.to_owned()) }
rule name(st: &mut SymbolTable) -> SymbolID = a:$(['a'..='z' | 'A'..='Z' | '_'] ['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) _ { st.gen(a) }
rule symbol(st: &mut SymbolTable) -> MsgHead = "'" a:(name(st) / op:any_binop(st) { op.to_owned() }) { MsgHead::Symbol(a) }
rule name_list(st: &mut SymbolTable) -> Vec<SymbolID> = a:(name(st) ** comma()) comma()? { a }
rule lambda_body(st: &mut SymbolTable) -> Vec<MsgChain> = "[" _ a:stmts(st) _ "]" _ { a }
rule lambda_expr(st: &mut SymbolTable) -> Vec<MsgChain> = "." _ a:expr(st) { vec![a] }
rule lambda(st: &mut SymbolTable) -> MsgChain = "\\" args:name_list(st)? body:(lambda_body(st) / lambda_expr(st) / expected!("lambda body")) {
let args = args.unwrap_or_default();
MsgChain{head:MsgHead::Lambda(args, body), chain:vec![]}
}
rule tuple(st: &mut SymbolTable) -> Vec<MsgChain> = "(" _ a:(expr(st) ** comma()) comma()? ")" _ { a }
rule list(st: &mut SymbolTable) -> Vec<MsgChain> = "[" _ a:(expr(st) ** comma()) comma()? "]" _ { a }
rule elemtype(st: &mut SymbolTable) -> SymbolID = a:$("i32" / "i64" / "f32" / "f64" / "r32" / "r64" / "c32" / "c64") { st.gen(a) }
rule vector(st: &mut SymbolTable) -> Vec<MsgChain> = "#" e:(elemtype(st) / expected!("vector type name")) v:(list(st) / expected!("[..vector elements..]"))
{ let mut v = v; let mut a = vec![MsgChain{head:MsgHead::Symbol(e), chain:vec![]}]; a.append(&mut v); a }
rule primary(st: &mut SymbolTable) -> MsgChain =
a:float() { MsgChain::new(a) }
/ a:int() { MsgChain::new(a) }
/ a:string() { MsgChain::new(a) }
/ a:symbol(st) { MsgChain::new(a) }
/ a:lambda(st) { a }
/ a:tuple(st) {
if a.len() == 1 { // no monotuples.
let mut a = a;
std::mem::take(&mut a[0])
} else {
MsgChain{head:MsgHead::CurEnvir, chain:vec![Msg{name:st.gen("tuple"), args:a}]}
}
}
/ a:list(st) { MsgChain{head:MsgHead::CurEnvir, chain:vec![Msg{name:st.gen("list"), args:a}]} }
/ a:vector(st) { MsgChain{head:MsgHead::CurEnvir, chain:vec![Msg{name:st.gen("vec"), args:a}]} }
/ a:name(st) { MsgChain{head:MsgHead::CurEnvir, chain:vec![Msg{name:a, args:vec![]}]} }
rule unop_expr(st: &mut SymbolTable) -> MsgChain = a:primary(st) { a }
/ op:unop(st) a:(unop_expr(st) / expected!("expression after unary operator")) { let mut a = a; a.add_msg(Msg{name:op, args:vec![]}); a }
pub rule msgchain(st: &mut SymbolTable) -> MsgChain =
h:unop_expr(st) v:(
n:name(st) t:tuple(st) f:lambda(st)* { Msg{name:n, args:[t, f].concat()} }
/ n:name(st) f:lambda(st)* { Msg{name:n, args:f} }
/ a:list(st) { { Msg{name:st.gen("at"), args:a} } }
)* { MsgChain{head:h.head, chain:[h.chain, v].concat()} }
rule binop_assign(st: &mut SymbolTable) -> SymbolID = a:$("=" / ":=" / "+=" / "+~=" / "-=" / "*=" / "/=" / "%=" / "<<=" / ">>=" / "&=" / "|=" / "^=") _ { st.gen(a) }
rule binop_or(st: &mut SymbolTable) -> SymbolID = a:$("||") _ { st.gen(a) }
rule binop_and(st: &mut SymbolTable) -> SymbolID = a:$("&&") _ { st.gen(a) }
rule binop_equality(st: &mut SymbolTable) -> SymbolID = a:$("!==" / "===" / "!=" / "==") _ { st.gen(a) }
rule binop_inequality(st: &mut SymbolTable) -> SymbolID = a:$("<=" / "<" / ">=" / ">" / "<=>") _ { st.gen(a) }
rule binop_bit_or(st: &mut SymbolTable) -> SymbolID = a:$("|") _ { st.gen(a) }
rule binop_bit_xor(st: &mut SymbolTable) -> SymbolID = a:$("^") _ { st.gen(a) }
rule binop_bit_and(st: &mut SymbolTable) -> SymbolID = a:$("&") _ { st.gen(a) }
rule binop_bit_shift(st: &mut SymbolTable) -> SymbolID = a:$("<<" / ">>") _ { st.gen(a) }
rule binop_add(st: &mut SymbolTable) -> SymbolID = a:$("+~" / "+-" / "+" / "-" / "|" / "^") _ { st.gen(a) }
rule binop_mul(st: &mut SymbolTable) -> SymbolID = a:$("*" / "/" / "%") _ { st.gen(a) }
rule unop(st: &mut SymbolTable) -> SymbolID = a:("-" {"neg"}/ "!" {"not"}/ "~" {"bitnot"}) _ { st.gen(a) } // unary prefix operators are aliases for symbols.
rule any_binop(st: &mut SymbolTable) -> SymbolID = a:(binop_assign(st) / binop_or(st) / binop_and(st) / binop_equality(st)
/ binop_inequality(st) / binop_bit_or(st) / binop_bit_xor(st) / binop_bit_and(st)
/ binop_bit_shift(st) / binop_add(st) / binop_mul(st)) { a }
//
pub rule expr(st: &mut SymbolTable) -> MsgChain = z:(
a:msgchain(st) op:binop_assign(st) b:expr_or(st) { let mut a = a; a.add_msg(Msg{name:op, args:vec![b]}); a }
/ a:expr_or(st) { a }
) { z }
rule expr_or(st: &mut SymbolTable) -> MsgChain =
a:expr_and(st) m:(op:binop_or(st) b:expr_and(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule expr_and(st: &mut SymbolTable) -> MsgChain =
a:expr_ineq(st) m:(op:binop_and(st) b:expr_ineq(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule expr_ineq(st: &mut SymbolTable) -> MsgChain =
a:expr_eq(st) m:(op:binop_inequality(st) b:expr_eq(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule expr_eq(st: &mut SymbolTable) -> MsgChain =
a:expr_bit_or(st) m:(op:binop_equality(st) b:expr_bit_or(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule expr_bit_or(st: &mut SymbolTable) -> MsgChain =
a:expr_bit_xor(st) m:(op:binop_bit_or(st) b:expr_bit_xor(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule expr_bit_xor(st: &mut SymbolTable) -> MsgChain =
a:expr_bit_and(st) m:(op:binop_bit_xor(st) b:expr_bit_and(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule expr_bit_and(st: &mut SymbolTable) -> MsgChain =
a:expr_bit_shift(st) m:(op:binop_bit_and(st) b:expr_bit_shift(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule expr_bit_shift(st: &mut SymbolTable) -> MsgChain =
a:expr_add(st) m:(op:binop_bit_shift(st) b:expr_add(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule expr_add(st: &mut SymbolTable) -> MsgChain =
a:expr_mul(st) m:(op:binop_add(st) b:expr_mul(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule expr_mul(st: &mut SymbolTable) -> MsgChain =
a:msgchain(st) m:(op:binop_mul(st) b:msgchain(st)
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a }
rule stmts(st: &mut SymbolTable) -> Vec<MsgChain> = a:(expr(st) ** semicolon()) semicolon()? _ { a }
pub rule program(st: &mut SymbolTable) -> Vec<MsgChain> = _ a:stmts(st) { a }
}
}
pub fn main() {
let mut symbol_table: SymbolTable = Default::default();
let st = &mut symbol_table;
// test symbol table
println!("s_any {:?}", st.gen("any"));
println!("s_none {:?}", st.gen("none"));
println!("s_any {:?}", st.gen("any"));
println!("s_none {:?}", st.gen("none"));
st.clear();
// simple expressions
println!("proto: {:#?}", proto_parser::expr("x + y", st));
st.clear();
println!("proto: {:#?}", proto_parser::expr("2 atan2(3)", st));
st.clear();
println!("proto: {:#?}", proto_parser::expr("81 + .4", st));
st.clear();
println!("proto: {:#?}", proto_parser::expr("81 + .4 lfsaw", st));
st.clear();
println!("proto: {:#?}", proto_parser::expr("81 + lfsaw", st));
st.clear();
println!("proto: {:#?}", proto_parser::expr("81 + lfsaw * 24", st));
st.clear();
println!("proto: {:#?}", proto_parser::expr("81 * lfsaw + 24", st));
st.clear();
println!(
"proto: {:#?}",
proto_parser::expr("\"this is a string\" reverse", st)
);
st.clear();
println!(
"proto: {:#?}",
proto_parser::expr("a b c", st) //proto_parser::expr("a b c(x) d(y, z) [u] \\x[x*x]")
);
st.clear();
println!("proto: {:#?}", proto_parser::expr("81 + .4 lfsaw * 24", st));
st.clear();
println!(
"proto: {:#?}",
proto_parser::expr("81 + .4 lfsaw * 24 + [8, 7.32]", st)
);
st.clear();
println!(
"proto: {:#?}",
proto_parser::program(
"freq = (81 + .4 lfsaw * 24 + [8, 7.32] lfsaw * 3) nnhz;",
st
)
);
st.clear();
println!(
"proto: {:#?}",
proto_parser::program("bubbles = \\x[x*x];", st)
);
println!("st {:?}", st);
st.clear();
println!(
"proto: {:#?}",
proto_parser::program(r" bubbles = \x[x*x];", st)
);
println!("st {:?}", st);
st.clear();
// a function.
println!(
"proto: {:#?}",
proto_parser::program(
r"
bubbles = \[
freq = (81 + .4 lfsaw * 24 + [8, 7.32] lfsaw * 3) nnhz;
out = .04 * freq fsinosc;
fx = out fadein(.1) comb(.2, 4);
fx jackout
];
",
st
)
);
println!("st {:?}", st);
st.clear();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment