Last active
July 18, 2022 04:06
-
-
Save lfnoise/cd666681203df2cc53548c9659fd9b96 to your computer and use it in GitHub Desktop.
A parser made using rust-peg.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::HashMap; | |
#[derive(Debug, Clone)] | |
pub struct Symbol { | |
name: String, | |
} | |
#[derive(Clone, Copy, Debug)] | |
pub struct SymbolID(usize); | |
#[derive(Clone, Debug, Default)] | |
pub struct SymbolTable { | |
names: HashMap<String, SymbolID>, | |
} | |
impl SymbolTable { | |
pub fn clear(&mut self) { | |
self.names.clear(); | |
} | |
pub fn gen(&mut self, s: &str) -> SymbolID { | |
if self.names.contains_key(s) { | |
self.names[s] | |
} else { | |
let i = SymbolID(self.names.len()); | |
self.names.insert(s.to_owned(), i); | |
i | |
} | |
} | |
} | |
#[derive(Debug, Clone)] | |
pub enum MsgHead { | |
CurEnvir, | |
Int(i64), | |
Float(f64), | |
String(String), | |
Symbol(SymbolID), | |
Lambda(Vec<SymbolID>, Vec<MsgChain>), | |
} | |
#[derive(Debug, Clone)] | |
pub struct Msg { | |
name: SymbolID, | |
args: Vec<MsgChain>, | |
} | |
#[derive(Debug, Clone)] | |
pub struct MsgChain { | |
head: MsgHead, | |
chain: Vec<Msg>, | |
} | |
impl Default for MsgChain { | |
fn default() -> Self { | |
MsgChain { | |
head: MsgHead::CurEnvir, | |
chain: vec![], | |
} | |
} | |
} | |
impl MsgChain { | |
fn new(head: MsgHead) -> Self { | |
Self { | |
head, | |
chain: vec![], | |
} | |
} | |
fn add_msg(&mut self, m: Msg) { | |
self.chain.push(m); | |
} | |
fn add_msgs(&mut self, m: &mut Vec<Msg>) { | |
self.chain.append(m); | |
} | |
} | |
peg::parser! { | |
grammar proto_parser() for str { | |
rule _() = quiet!{[' ' | '\t' | '\n' | '\r' ]*} | |
rule comma() = "," _ | |
rule semicolon() = ";" _ | |
rule int() -> MsgHead = a:$mantissa() _ { MsgHead::Int(a.parse().unwrap()) } | |
rule mantissa() = "0" / (['1'..='9'] ['0'..='9']* ) | |
rule expon() = ("e" / "E") ("+" / "-")? int() | |
rule frac() = "." ['0'..='9']* | |
rule frac1() = "." ['0'..='9']+ | |
rule float() -> MsgHead = a:$((mantissa() frac()? / frac1()) expon()?) _ { MsgHead::Float(a.parse().unwrap()) } | |
rule string() -> MsgHead = "\"" a:$([^'"']*) "\"" _ { MsgHead::String(a.to_owned()) } | |
rule name(st: &mut SymbolTable) -> SymbolID = a:$(['a'..='z' | 'A'..='Z' | '_'] ['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) _ { st.gen(a) } | |
rule symbol(st: &mut SymbolTable) -> MsgHead = "'" a:(name(st) / op:any_binop(st) { op.to_owned() }) { MsgHead::Symbol(a) } | |
rule name_list(st: &mut SymbolTable) -> Vec<SymbolID> = a:(name(st) ** comma()) comma()? { a } | |
rule lambda_body(st: &mut SymbolTable) -> Vec<MsgChain> = "[" _ a:stmts(st) _ "]" _ { a } | |
rule lambda_expr(st: &mut SymbolTable) -> Vec<MsgChain> = "." _ a:expr(st) { vec![a] } | |
rule lambda(st: &mut SymbolTable) -> MsgChain = "\\" args:name_list(st)? body:(lambda_body(st) / lambda_expr(st) / expected!("lambda body")) { | |
let args = args.unwrap_or_default(); | |
MsgChain{head:MsgHead::Lambda(args, body), chain:vec![]} | |
} | |
rule tuple(st: &mut SymbolTable) -> Vec<MsgChain> = "(" _ a:(expr(st) ** comma()) comma()? ")" _ { a } | |
rule list(st: &mut SymbolTable) -> Vec<MsgChain> = "[" _ a:(expr(st) ** comma()) comma()? "]" _ { a } | |
rule elemtype(st: &mut SymbolTable) -> SymbolID = a:$("i32" / "i64" / "f32" / "f64" / "r32" / "r64" / "c32" / "c64") { st.gen(a) } | |
rule vector(st: &mut SymbolTable) -> Vec<MsgChain> = "#" e:(elemtype(st) / expected!("vector type name")) v:(list(st) / expected!("[..vector elements..]")) | |
{ let mut v = v; let mut a = vec![MsgChain{head:MsgHead::Symbol(e), chain:vec![]}]; a.append(&mut v); a } | |
rule primary(st: &mut SymbolTable) -> MsgChain = | |
a:float() { MsgChain::new(a) } | |
/ a:int() { MsgChain::new(a) } | |
/ a:string() { MsgChain::new(a) } | |
/ a:symbol(st) { MsgChain::new(a) } | |
/ a:lambda(st) { a } | |
/ a:tuple(st) { | |
if a.len() == 1 { // no monotuples. | |
let mut a = a; | |
std::mem::take(&mut a[0]) | |
} else { | |
MsgChain{head:MsgHead::CurEnvir, chain:vec![Msg{name:st.gen("tuple"), args:a}]} | |
} | |
} | |
/ a:list(st) { MsgChain{head:MsgHead::CurEnvir, chain:vec![Msg{name:st.gen("list"), args:a}]} } | |
/ a:vector(st) { MsgChain{head:MsgHead::CurEnvir, chain:vec![Msg{name:st.gen("vec"), args:a}]} } | |
/ a:name(st) { MsgChain{head:MsgHead::CurEnvir, chain:vec![Msg{name:a, args:vec![]}]} } | |
rule unop_expr(st: &mut SymbolTable) -> MsgChain = a:primary(st) { a } | |
/ op:unop(st) a:(unop_expr(st) / expected!("expression after unary operator")) { let mut a = a; a.add_msg(Msg{name:op, args:vec![]}); a } | |
pub rule msgchain(st: &mut SymbolTable) -> MsgChain = | |
h:unop_expr(st) v:( | |
n:name(st) t:tuple(st) f:lambda(st)* { Msg{name:n, args:[t, f].concat()} } | |
/ n:name(st) f:lambda(st)* { Msg{name:n, args:f} } | |
/ a:list(st) { { Msg{name:st.gen("at"), args:a} } } | |
)* { MsgChain{head:h.head, chain:[h.chain, v].concat()} } | |
rule binop_assign(st: &mut SymbolTable) -> SymbolID = a:$("=" / ":=" / "+=" / "+~=" / "-=" / "*=" / "/=" / "%=" / "<<=" / ">>=" / "&=" / "|=" / "^=") _ { st.gen(a) } | |
rule binop_or(st: &mut SymbolTable) -> SymbolID = a:$("||") _ { st.gen(a) } | |
rule binop_and(st: &mut SymbolTable) -> SymbolID = a:$("&&") _ { st.gen(a) } | |
rule binop_equality(st: &mut SymbolTable) -> SymbolID = a:$("!==" / "===" / "!=" / "==") _ { st.gen(a) } | |
rule binop_inequality(st: &mut SymbolTable) -> SymbolID = a:$("<=" / "<" / ">=" / ">" / "<=>") _ { st.gen(a) } | |
rule binop_bit_or(st: &mut SymbolTable) -> SymbolID = a:$("|") _ { st.gen(a) } | |
rule binop_bit_xor(st: &mut SymbolTable) -> SymbolID = a:$("^") _ { st.gen(a) } | |
rule binop_bit_and(st: &mut SymbolTable) -> SymbolID = a:$("&") _ { st.gen(a) } | |
rule binop_bit_shift(st: &mut SymbolTable) -> SymbolID = a:$("<<" / ">>") _ { st.gen(a) } | |
rule binop_add(st: &mut SymbolTable) -> SymbolID = a:$("+~" / "+-" / "+" / "-" / "|" / "^") _ { st.gen(a) } | |
rule binop_mul(st: &mut SymbolTable) -> SymbolID = a:$("*" / "/" / "%") _ { st.gen(a) } | |
rule unop(st: &mut SymbolTable) -> SymbolID = a:("-" {"neg"}/ "!" {"not"}/ "~" {"bitnot"}) _ { st.gen(a) } // unary prefix operators are aliases for symbols. | |
rule any_binop(st: &mut SymbolTable) -> SymbolID = a:(binop_assign(st) / binop_or(st) / binop_and(st) / binop_equality(st) | |
/ binop_inequality(st) / binop_bit_or(st) / binop_bit_xor(st) / binop_bit_and(st) | |
/ binop_bit_shift(st) / binop_add(st) / binop_mul(st)) { a } | |
// | |
pub rule expr(st: &mut SymbolTable) -> MsgChain = z:( | |
a:msgchain(st) op:binop_assign(st) b:expr_or(st) { let mut a = a; a.add_msg(Msg{name:op, args:vec![b]}); a } | |
/ a:expr_or(st) { a } | |
) { z } | |
rule expr_or(st: &mut SymbolTable) -> MsgChain = | |
a:expr_and(st) m:(op:binop_or(st) b:expr_and(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule expr_and(st: &mut SymbolTable) -> MsgChain = | |
a:expr_ineq(st) m:(op:binop_and(st) b:expr_ineq(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule expr_ineq(st: &mut SymbolTable) -> MsgChain = | |
a:expr_eq(st) m:(op:binop_inequality(st) b:expr_eq(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule expr_eq(st: &mut SymbolTable) -> MsgChain = | |
a:expr_bit_or(st) m:(op:binop_equality(st) b:expr_bit_or(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule expr_bit_or(st: &mut SymbolTable) -> MsgChain = | |
a:expr_bit_xor(st) m:(op:binop_bit_or(st) b:expr_bit_xor(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule expr_bit_xor(st: &mut SymbolTable) -> MsgChain = | |
a:expr_bit_and(st) m:(op:binop_bit_xor(st) b:expr_bit_and(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule expr_bit_and(st: &mut SymbolTable) -> MsgChain = | |
a:expr_bit_shift(st) m:(op:binop_bit_and(st) b:expr_bit_shift(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule expr_bit_shift(st: &mut SymbolTable) -> MsgChain = | |
a:expr_add(st) m:(op:binop_bit_shift(st) b:expr_add(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule expr_add(st: &mut SymbolTable) -> MsgChain = | |
a:expr_mul(st) m:(op:binop_add(st) b:expr_mul(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule expr_mul(st: &mut SymbolTable) -> MsgChain = | |
a:msgchain(st) m:(op:binop_mul(st) b:msgchain(st) | |
{ Msg{name:op, args:vec![b]}})* { let (mut a, mut m) = (a,m); a.add_msgs(&mut m); a } | |
rule stmts(st: &mut SymbolTable) -> Vec<MsgChain> = a:(expr(st) ** semicolon()) semicolon()? _ { a } | |
pub rule program(st: &mut SymbolTable) -> Vec<MsgChain> = _ a:stmts(st) { a } | |
} | |
} | |
pub fn main() { | |
let mut symbol_table: SymbolTable = Default::default(); | |
let st = &mut symbol_table; | |
// test symbol table | |
println!("s_any {:?}", st.gen("any")); | |
println!("s_none {:?}", st.gen("none")); | |
println!("s_any {:?}", st.gen("any")); | |
println!("s_none {:?}", st.gen("none")); | |
st.clear(); | |
// simple expressions | |
println!("proto: {:#?}", proto_parser::expr("x + y", st)); | |
st.clear(); | |
println!("proto: {:#?}", proto_parser::expr("2 atan2(3)", st)); | |
st.clear(); | |
println!("proto: {:#?}", proto_parser::expr("81 + .4", st)); | |
st.clear(); | |
println!("proto: {:#?}", proto_parser::expr("81 + .4 lfsaw", st)); | |
st.clear(); | |
println!("proto: {:#?}", proto_parser::expr("81 + lfsaw", st)); | |
st.clear(); | |
println!("proto: {:#?}", proto_parser::expr("81 + lfsaw * 24", st)); | |
st.clear(); | |
println!("proto: {:#?}", proto_parser::expr("81 * lfsaw + 24", st)); | |
st.clear(); | |
println!( | |
"proto: {:#?}", | |
proto_parser::expr("\"this is a string\" reverse", st) | |
); | |
st.clear(); | |
println!( | |
"proto: {:#?}", | |
proto_parser::expr("a b c", st) //proto_parser::expr("a b c(x) d(y, z) [u] \\x[x*x]") | |
); | |
st.clear(); | |
println!("proto: {:#?}", proto_parser::expr("81 + .4 lfsaw * 24", st)); | |
st.clear(); | |
println!( | |
"proto: {:#?}", | |
proto_parser::expr("81 + .4 lfsaw * 24 + [8, 7.32]", st) | |
); | |
st.clear(); | |
println!( | |
"proto: {:#?}", | |
proto_parser::program( | |
"freq = (81 + .4 lfsaw * 24 + [8, 7.32] lfsaw * 3) nnhz;", | |
st | |
) | |
); | |
st.clear(); | |
println!( | |
"proto: {:#?}", | |
proto_parser::program("bubbles = \\x[x*x];", st) | |
); | |
println!("st {:?}", st); | |
st.clear(); | |
println!( | |
"proto: {:#?}", | |
proto_parser::program(r" bubbles = \x[x*x];", st) | |
); | |
println!("st {:?}", st); | |
st.clear(); | |
// a function. | |
println!( | |
"proto: {:#?}", | |
proto_parser::program( | |
r" | |
bubbles = \[ | |
freq = (81 + .4 lfsaw * 24 + [8, 7.32] lfsaw * 3) nnhz; | |
out = .04 * freq fsinosc; | |
fx = out fadein(.1) comb(.2, 4); | |
fx jackout | |
]; | |
", | |
st | |
) | |
); | |
println!("st {:?}", st); | |
st.clear(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment