Skip to content

Instantly share code, notes, and snippets.

@dfhoughton
Last active October 28, 2018 18:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dfhoughton/6188e187149da9dc70a0e09cd02f6667 to your computer and use it in GitHub Desktop.
Save dfhoughton/6188e187149da9dc70a0e09cd02f6667 to your computer and use it in GitHub Desktop.
proof of concept grammar macro
// macro for converting a declarative expression such as
// grammar!{
// (?bBw)
// larry => <bob>{5} | <bar>{3,}
// larry -> ("this")? <bar>* ("that") <bob>+
// bob => r("foo")
// bar => (?i) [&v]
// };
// into a pidgin::Grammar
//
// this runs (see fn main)
macro_rules! grammar {
// common state has been set, proceed to recursively nibbling away bits of the grammar
( @initialized $mflags:expr, $($rest:tt)+ ) => ({
let mut l: Vec<String> = Vec::new();
let mut m: std::collections::HashMap<String,Vec<(MacroFlags,Vec<Part>)>> = std::collections::HashMap::new();
grammar!(@rules l, m, $($rest)+ );
PseudoGrammar::new(l, m, $mflags)
});
// the general way to start a new rule
(@start_rule $l:expr, $m:expr, $name:ident, $mf:expr, $($rest:tt)+) => (
let name = stringify!($name).to_string();
if !$m.contains_key(&name) {
$l.push(name.clone());
}
let alternates = $m.entry(name).or_insert_with(|| Vec::new());
let v: Vec<Part>= Vec::new();
alternates.push(($mf, v));
grammar!(@rules $l, $m, $($rest)+)
);
// extract rule name and any flags particular to that rule
// => rules don't mess with space
(@rules $l:expr, $m:expr, $name:ident => (?$on:ident-$off:ident) $($rest:tt)+) => (
let new_flags = MacroFlags::from_strings(stringify!($on), stringify!($off));
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+)
);
(@rules $l:expr, $m:expr, $name:ident => (?$on:ident) $($rest:tt)+) => (
let new_flags = MacroFlags::from_strings(stringify!($on), "");
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+)
);
(@rules $l:expr, $m:expr, $name:ident => (?-$off:ident) $($rest:tt)+) => (
let new_flags = MacroFlags::from_strings("", stringify!($off));
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+)
);
(@rules $l:expr, $m:expr, $name:ident => $($rest:tt)+) => (
let new_flags = MacroFlags::defaults();
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+)
);
// -> allow optional space between tokens
(@rules $l:expr, $m:expr, $name:ident -> (?$on:ident-$off:ident) $($rest:tt)+) => (
let mut new_flags = MacroFlags::from_strings(stringify!($on), stringify!($off));
new_flags.add_space = true;
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+)
);
(@rules $l:expr, $m:expr, $name:ident -> (?$on:ident) $($rest:tt)+) => (
let mut new_flags = MacroFlags::from_strings(stringify!($on), "");
new_flags.add_space = true;
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+)
);
(@rules $l:expr, $m:expr, $name:ident -> (?-$off:ident) $($rest:tt)+) => (
let mut new_flags = MacroFlags::from_strings("", stringify!($off));
new_flags.add_space = true;
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+)
);
(@rules $l:expr, $m:expr, $name:ident -> $($rest:tt)+) => (
let mut new_flags = MacroFlags::defaults();
new_flags.add_space = true;
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+)
);
// general method for adding a rule part
( @add_part $l:expr, $m:expr, $p:expr, $($parts:tt)* ) => (
let v = &mut $m.get_mut($l.last().unwrap()).unwrap().last_mut().unwrap().1;
v.push($p);
grammar!(@rules $l, $m, $($parts)*)
);
// general rule for adding a <rule>
// repetition suffix is optional
( @add_grammar $l:expr, $m:expr, $e:ident, $low:expr, $high:expr, $stingy:expr, $($parts:tt)* ) => (
grammar!(
@add_part
$l,
$m,
Part::G(stringify!($e).to_string(), $low, $high, $stingy),
$($parts)*
)
);
( @rules $l:expr, $m:expr, <$e:ident>?? $($parts:tt)* ) => (
grammar!(@add_grammar $l, $m, $e, None, Some(1), true, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>? $($parts:tt)* ) => (
grammar!(@add_grammar $l, $m, $e, None, Some(1), false, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>*? $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, Some(0), None, true, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>* $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, Some(0), None, false, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>+? $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, Some(1), None, true, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>+ $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, Some(1), None, false, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>{$low:expr,$high:expr}? $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, Some($low), Some($high), true, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>{$low:expr,$high:expr} $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, Some($low), Some($high), false, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>{$low:expr,}? $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, Some($low), None, true, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>{$low:expr,} $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, Some($low), None, false, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident>{$n:expr} $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, Some($n), Some($n), false, $($parts)*)
);
( @rules $l:expr, $m:expr, <$e:ident> $($parts:tt)*) => (
grammar!(@add_grammar $l, $m, $e, None, None, false, $($parts)*)
);
// general rule for adding a (rule)
// (string) introduces a single leaf; repetition suffix is optional
( @add_string $l:expr, $m:expr, $e:expr, $low:expr, $high:expr, $stingy:expr, $($parts:tt)* ) => (
grammar!(
@add_part
$l,
$m,
Part::S($e.to_string(), $low, $high, $stingy),
$($parts)*
)
);
( @rules $l:expr, $m:expr, ($e:expr)?? $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, None, Some(1), true, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr)? $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, None, Some(1), false, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr)*? $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, Some(0), None, true, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr)* $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, Some(0), None, false, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr)+? $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, Some(1), None, true, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr)+ $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, Some(1), None, false, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr){$low:expr,$high:expr}? $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, Some($low), Some($high), true, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr){$low:expr,$high:expr} $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, Some($low), Some($high), false, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr){$low:expr,}? $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, Some($low), None, true, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr){$low:expr,} $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, Some($low), None, false, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr){$n:expr} $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, Some($n), Some($n), false, $($parts)*)
);
( @rules $l:expr, $m:expr, ($e:expr) $($parts:tt)*) => (
grammar!(@add_string $l, $m, $e, None, None, false, $($parts)*)
);
// general rule for adding a [rule]
// [leaves] ingest a list of leaves; repetition suffix is optional
( @add_vec $l:expr, $m:expr, $e:expr, $low:expr, $high:expr, $stingy:expr, $($parts:tt)* ) => (
let terms: &[&str] = $e;
grammar!(
@add_part
$l,
$m,
Part::V(terms.iter().map(|s| s.to_string()).collect(), $low, $high, $stingy),
$($parts)*
)
);
( @rules $l:expr, $m:expr, [$e:expr]?? $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, None, Some(1), true, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]? $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, None, Some(1), false, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]*? $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, Some(0), None, true, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]* $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, Some(0), None, false, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]+? $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, Some(1), None, true, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]+ $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, Some(1), None, false, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]{$low:expr,$high:expr}? $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, Some($low), Some($high), true, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]{$low:expr,$high:expr} $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, Some($low), Some($high), false, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]{$low:expr,}? $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, Some($low), None, true, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]{$low:expr,} $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, Some($low), None, false, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr]{$n:expr} $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, Some($n), Some($n), false, $($parts)*)
);
( @rules $l:expr, $m:expr, [$e:expr] $($parts:tt)*) => (
grammar!(@add_vec $l, $m, $e, None, None, false, $($parts)*)
);
// r(pattern) allows the introduction of an externally generated regex; no repetition suffix
( @rules $l:expr, $m:expr, r($e:expr) $($parts:tt)*) => (
grammar!(@add_part $l, $m, Part::R($e.to_string()), $($parts)*)
);
// | provides an alternate way to define an alternate (other than repeating a rule)
( @rules $l:expr, $m:expr, | $($parts:tt)* ) => (
let flags = $m.get($l.last().unwrap()).unwrap().last().unwrap().0.clone();
$m.get_mut($l.last().unwrap()).unwrap().push((flags, Vec::new()));
grammar!(@rules $l, $m, $($parts)*)
);
// basement rule that should never consume any token trees
// if we hit this, either the macro rules don't cover all cases or the
// conventions of the macro rules have not been followed
( @rules $l:expr, $m:expr, $($parts:tt)*) => (
$(
panic!("unused token tree: {}", stringify!($parts))
)*
);
// rules that match initially -- they define any flags common to all rules
( (?$on:ident) $($rest:tt)+ ) => ({
let mut mflags = MacroFlags::defaults();
mflags.set(stringify!($on), "");
grammar!(@initialized mflags, $($rest)+ )
});
( (?-$off:ident) $($rest:tt)+ ) => ({
let mut mflags = MacroFlags::defaults();
mflags.set("", stringify!($off));
grammar!(@initialized mflags, $($rest)+ )
});
( (?$on:ident-$off:ident) $($rest:tt)+ ) => ({
let mut mflags = MacroFlags::defaults();
mflags.set(stringify!($on), stringify!($off));
grammar!(@initialized mflags, $($rest)+ )
});
( $($rest:tt)+ ) => ({
let mflags = MacroFlags::defaults();
grammar!(@initialized mflags, $($rest)+ )
});
}
fn main() {
let v = vec!["baz", "plugh"];
let g = grammar!{
(?bBw)
larry => <bob>{5} | <bar>{3,}
larry -> ("this")? <bar>* ("that") <bob>+
bob => r("foo")
bar => (?i) [&v]
};
g.display();
let g = grammar!{
foo => ("bar")
};
g.display();
}
#[doc(hidden)]
pub struct PseudoGrammar {
l: Vec<String>,
m: std::collections::HashMap<String,Vec<(MacroFlags,Vec<Part>)>>,
macro_flags: MacroFlags,
}
impl PseudoGrammar {
pub fn new(l: Vec<String>, m: std::collections::HashMap<String,Vec<(MacroFlags,Vec<Part>)>>, macro_flags: MacroFlags) -> PseudoGrammar {
PseudoGrammar{l, m, macro_flags}
}
pub fn display(&self) {
println!("\n{}", self.macro_flags);
for rule in self.l.clone() {
print!("{} := ", &rule);
println!("{:?}", self.m.get(&rule).unwrap());
}
}
}
#[derive(Debug,Clone)]
#[doc(hidden)]
pub enum Part {
R(String),
V(Vec<String>,Option<usize>,Option<usize>,bool),
G(String,Option<usize>,Option<usize>,bool),
S(String,Option<usize>,Option<usize>,bool),
}
#[derive(Clone)]
#[doc(hidden)]
pub struct MacroFlags {
case_insensitive: bool, //i
multi_line: bool, //m
unicode: bool, //u
reverse_greed: bool, //U
dot_all: bool, //s
whitespace_some: bool, //w
whitespace_maybe: bool, //W
word_left: bool, //b
word_right: bool, //B
pub add_space: bool,
}
impl MacroFlags {
pub fn defaults() -> MacroFlags {
MacroFlags {
case_insensitive: false,
multi_line: false,
unicode: true,
reverse_greed: false,
dot_all: false,
whitespace_some: false,
whitespace_maybe: false,
word_left: false,
word_right: false,
add_space: false,
}
}
pub fn from_strings(on: &str, off: &str) -> MacroFlags {
let mut mf = MacroFlags::defaults();
mf.set(on, off);
mf
}
pub fn set(&mut self, on: &str, off: &str) {
for (s, dir) in vec![(on, true), (off, false)] {
for c in s.chars() {
match c {
'i' => self.case_insensitive = dir,
'm' => self.multi_line = dir,
'u' => self.unicode = dir,
'U' => self.reverse_greed = dir,
's' => self.dot_all = dir,
'w' => {
if dir {
self.whitespace_some = true;
self.whitespace_maybe = false;
} else {
self.whitespace_some = false;
}
},
'W' => {
if dir {
self.whitespace_some = false;
self.whitespace_maybe = true;
} else {
self.whitespace_maybe = false;
}
},
'b' => self.word_left = dir,
'B' => self.word_right = dir,
_ => panic!("unfamiliar flag: {}", c),
}
}
}
}
}
impl std::fmt::Display for MacroFlags {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let mut parts = vec![];
if self.case_insensitive {
parts.push("i");
}
if self.multi_line {
parts.push("m");
}
if self.dot_all {
parts.push("s");
}
if self.reverse_greed {
parts.push("U");
}
if self.word_left {
parts.push("b");
}
if self.word_right {
parts.push("B");
}
if self.whitespace_some {
parts.push("w");
}
if self.whitespace_maybe {
parts.push("W");
}
if self.add_space {
parts.push("a");
}
if !self.unicode {
parts.push("-u");
}
write!(f, "(?{})", parts.join(""))
}
}
impl std::fmt::Debug for MacroFlags {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self)
}
}
@dfhoughton
Copy link
Author

dfhoughton commented Oct 28, 2018

This code runs without problems. Its output is

(?bBw)
larry := [((?), [G("bob", Some(5), Some(5), false)]), ((?), [G("bar", Some(3), None, false)]), ((?a), [S("this", None, Some(1), false), G("ba
r", Some(0), None, false), S("that", None, None, false), G("bob", Some(1), None, false)])]
bob := [((?), [R("foo")])]
bar := [((?i), [V(["baz", "plugh"], None, None, false)])]

(?)
foo := [((?), [S("bar", None, None, false)])]

When I copy it into the pidgin project in src/macros.rs, export it in src/lib.rs via

#[macro_use]
pub mod macros;

And then invoke it in a test in tests/macros.rs like so

#![feature(test)]
#[macro_use]
extern crate pidgin;

#[test]
fn foo() {
    grammar!{
        foo => ("bar")
    };
}

I get a "cannot borrow m as mutable more than once at a time" error during compilation. Here's a detailed description of the error:

error[E0505]: cannot move out of `m` because it is borrowed                                                                                                                                                                                   
   --> src/macros.rs:11:47                                                                                                                                                                                                                    
    |                                                                                                                                                                                                                                         
5   |   macro_rules! grammar {                                                                                                                                                                                                                
    |  _-                                                                                                                                                                                                                                     
    | |_|                                                                                                                                                                                                                                     
    | |                                                                                                                                                                                                                                       
6   | |     // common state has been set, proceed to recursively nibbling away bits of the grammar                                                                                                                                            
7   | |     ( @initialized $mflags:expr, $($rest:tt)+ ) => ({                                                                                                                                                                                 
8   | |         let mut l: Vec<String> = Vec::new();                                                                                                                                                                                          
9   | |         let mut m: std::collections::HashMap<String,Vec<($crate::macros::MacroFlags,Vec<$crate::macros::Part>)>> = std::collections::HashMap::new();                                                                                  
10  | |         grammar!(@rules l, m, $($rest)+ );                                                                                                                                                                                            
    | |                            - borrow of `m` occurs here                                                                                                                                                                                
11  | |         $crate::macros::PseudoGrammar::new(l, m, $mflags)                                                                                                                                                                             
    | |                                               ^ move out of `m` occurs here                                                                                                                                                           
...   |                                                                                                                                                                                                                                       
260 | |         grammar!(@initialized mflags, $($rest)+ )                                                                                                                                                                                     
    | |         ----------------------------------------- in this macro invocation                                                                                                                                                            
261 | |     });                                                                                                                                                                                                                               
262 | | }                                                                                                                                                                                                                                     
    | | -                                                                                                                                                                                                                                     
    | |_|                                                                                                                                                                                                                                     
    | |_in this expansion of `grammar!`                                                                                                                                                                                                       
    |   in this expansion of `grammar!`                                                                                                                                                                                                       
...                                                                                                                                                                                                                                           
274 |       let g = grammar!{                                                                                                                                                                                                                 
    |  _____________-                                                                                                                                                                                                                         
275 |           foo => ("bar")                                                                                                                                                                                                                
276 | |     };                                                                                                                                                                                                                                
    | |_____- in this macro invocation

Since this is exactly the same code, modulo the addition of $crate::macros:: in various places, I don't understand how there can be a borrowing problem.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment