Last active
October 28, 2018 18:37
-
-
Save dfhoughton/6188e187149da9dc70a0e09cd02f6667 to your computer and use it in GitHub Desktop.
proof of concept grammar macro
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// macro for converting a declarative expression such as | |
// grammar!{ | |
// (?bBw) | |
// larry => <bob>{5} | <bar>{3,} | |
// larry -> ("this")? <bar>* ("that") <bob>+ | |
// bob => r("foo") | |
// bar => (?i) [&v] | |
// }; | |
// into a pidgin::Grammar | |
// | |
// this runs (see fn main) | |
macro_rules! grammar { | |
// common state has been set, proceed to recursively nibbling away bits of the grammar | |
( @initialized $mflags:expr, $($rest:tt)+ ) => ({ | |
let mut l: Vec<String> = Vec::new(); | |
let mut m: std::collections::HashMap<String,Vec<(MacroFlags,Vec<Part>)>> = std::collections::HashMap::new(); | |
grammar!(@rules l, m, $($rest)+ ); | |
PseudoGrammar::new(l, m, $mflags) | |
}); | |
// the general way to start a new rule | |
(@start_rule $l:expr, $m:expr, $name:ident, $mf:expr, $($rest:tt)+) => ( | |
let name = stringify!($name).to_string(); | |
if !$m.contains_key(&name) { | |
$l.push(name.clone()); | |
} | |
let alternates = $m.entry(name).or_insert_with(|| Vec::new()); | |
let v: Vec<Part>= Vec::new(); | |
alternates.push(($mf, v)); | |
grammar!(@rules $l, $m, $($rest)+) | |
); | |
// extract rule name and any flags particular to that rule | |
// => rules don't mess with space | |
(@rules $l:expr, $m:expr, $name:ident => (?$on:ident-$off:ident) $($rest:tt)+) => ( | |
let new_flags = MacroFlags::from_strings(stringify!($on), stringify!($off)); | |
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+) | |
); | |
(@rules $l:expr, $m:expr, $name:ident => (?$on:ident) $($rest:tt)+) => ( | |
let new_flags = MacroFlags::from_strings(stringify!($on), ""); | |
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+) | |
); | |
(@rules $l:expr, $m:expr, $name:ident => (?-$off:ident) $($rest:tt)+) => ( | |
let new_flags = MacroFlags::from_strings("", stringify!($off)); | |
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+) | |
); | |
(@rules $l:expr, $m:expr, $name:ident => $($rest:tt)+) => ( | |
let new_flags = MacroFlags::defaults(); | |
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+) | |
); | |
// -> allow optional space between tokens | |
(@rules $l:expr, $m:expr, $name:ident -> (?$on:ident-$off:ident) $($rest:tt)+) => ( | |
let mut new_flags = MacroFlags::from_strings(stringify!($on), stringify!($off)); | |
new_flags.add_space = true; | |
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+) | |
); | |
(@rules $l:expr, $m:expr, $name:ident -> (?$on:ident) $($rest:tt)+) => ( | |
let mut new_flags = MacroFlags::from_strings(stringify!($on), ""); | |
new_flags.add_space = true; | |
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+) | |
); | |
(@rules $l:expr, $m:expr, $name:ident -> (?-$off:ident) $($rest:tt)+) => ( | |
let mut new_flags = MacroFlags::from_strings("", stringify!($off)); | |
new_flags.add_space = true; | |
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+) | |
); | |
(@rules $l:expr, $m:expr, $name:ident -> $($rest:tt)+) => ( | |
let mut new_flags = MacroFlags::defaults(); | |
new_flags.add_space = true; | |
grammar!(@start_rule $l, $m, $name, new_flags, $($rest)+) | |
); | |
// general method for adding a rule part | |
( @add_part $l:expr, $m:expr, $p:expr, $($parts:tt)* ) => ( | |
let v = &mut $m.get_mut($l.last().unwrap()).unwrap().last_mut().unwrap().1; | |
v.push($p); | |
grammar!(@rules $l, $m, $($parts)*) | |
); | |
// general rule for adding a <rule> | |
// repetition suffix is optional | |
( @add_grammar $l:expr, $m:expr, $e:ident, $low:expr, $high:expr, $stingy:expr, $($parts:tt)* ) => ( | |
grammar!( | |
@add_part | |
$l, | |
$m, | |
Part::G(stringify!($e).to_string(), $low, $high, $stingy), | |
$($parts)* | |
) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>?? $($parts:tt)* ) => ( | |
grammar!(@add_grammar $l, $m, $e, None, Some(1), true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>? $($parts:tt)* ) => ( | |
grammar!(@add_grammar $l, $m, $e, None, Some(1), false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>*? $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, Some(0), None, true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>* $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, Some(0), None, false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>+? $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, Some(1), None, true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>+ $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, Some(1), None, false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>{$low:expr,$high:expr}? $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, Some($low), Some($high), true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>{$low:expr,$high:expr} $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, Some($low), Some($high), false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>{$low:expr,}? $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, Some($low), None, true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>{$low:expr,} $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, Some($low), None, false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident>{$n:expr} $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, Some($n), Some($n), false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, <$e:ident> $($parts:tt)*) => ( | |
grammar!(@add_grammar $l, $m, $e, None, None, false, $($parts)*) | |
); | |
// general rule for adding a (rule) | |
// (string) introduces a single leaf; repetition suffix is optional | |
( @add_string $l:expr, $m:expr, $e:expr, $low:expr, $high:expr, $stingy:expr, $($parts:tt)* ) => ( | |
grammar!( | |
@add_part | |
$l, | |
$m, | |
Part::S($e.to_string(), $low, $high, $stingy), | |
$($parts)* | |
) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr)?? $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, None, Some(1), true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr)? $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, None, Some(1), false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr)*? $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, Some(0), None, true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr)* $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, Some(0), None, false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr)+? $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, Some(1), None, true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr)+ $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, Some(1), None, false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr){$low:expr,$high:expr}? $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, Some($low), Some($high), true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr){$low:expr,$high:expr} $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, Some($low), Some($high), false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr){$low:expr,}? $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, Some($low), None, true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr){$low:expr,} $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, Some($low), None, false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr){$n:expr} $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, Some($n), Some($n), false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, ($e:expr) $($parts:tt)*) => ( | |
grammar!(@add_string $l, $m, $e, None, None, false, $($parts)*) | |
); | |
// general rule for adding a [rule] | |
// [leaves] ingest a list of leaves; repetition suffix is optional | |
( @add_vec $l:expr, $m:expr, $e:expr, $low:expr, $high:expr, $stingy:expr, $($parts:tt)* ) => ( | |
let terms: &[&str] = $e; | |
grammar!( | |
@add_part | |
$l, | |
$m, | |
Part::V(terms.iter().map(|s| s.to_string()).collect(), $low, $high, $stingy), | |
$($parts)* | |
) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]?? $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, None, Some(1), true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]? $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, None, Some(1), false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]*? $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, Some(0), None, true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]* $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, Some(0), None, false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]+? $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, Some(1), None, true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]+ $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, Some(1), None, false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]{$low:expr,$high:expr}? $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, Some($low), Some($high), true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]{$low:expr,$high:expr} $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, Some($low), Some($high), false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]{$low:expr,}? $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, Some($low), None, true, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]{$low:expr,} $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, Some($low), None, false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr]{$n:expr} $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, Some($n), Some($n), false, $($parts)*) | |
); | |
( @rules $l:expr, $m:expr, [$e:expr] $($parts:tt)*) => ( | |
grammar!(@add_vec $l, $m, $e, None, None, false, $($parts)*) | |
); | |
// r(pattern) allows the introduction of an externally generated regex; no repetition suffix | |
( @rules $l:expr, $m:expr, r($e:expr) $($parts:tt)*) => ( | |
grammar!(@add_part $l, $m, Part::R($e.to_string()), $($parts)*) | |
); | |
// | provides an alternate way to define an alternate (other than repeating a rule) | |
( @rules $l:expr, $m:expr, | $($parts:tt)* ) => ( | |
let flags = $m.get($l.last().unwrap()).unwrap().last().unwrap().0.clone(); | |
$m.get_mut($l.last().unwrap()).unwrap().push((flags, Vec::new())); | |
grammar!(@rules $l, $m, $($parts)*) | |
); | |
// basement rule that should never consume any token trees | |
// if we hit this, either the macro rules don't cover all cases or the | |
// conventions of the macro rules have not been followed | |
( @rules $l:expr, $m:expr, $($parts:tt)*) => ( | |
$( | |
panic!("unused token tree: {}", stringify!($parts)) | |
)* | |
); | |
// rules that match initially -- they define any flags common to all rules | |
( (?$on:ident) $($rest:tt)+ ) => ({ | |
let mut mflags = MacroFlags::defaults(); | |
mflags.set(stringify!($on), ""); | |
grammar!(@initialized mflags, $($rest)+ ) | |
}); | |
( (?-$off:ident) $($rest:tt)+ ) => ({ | |
let mut mflags = MacroFlags::defaults(); | |
mflags.set("", stringify!($off)); | |
grammar!(@initialized mflags, $($rest)+ ) | |
}); | |
( (?$on:ident-$off:ident) $($rest:tt)+ ) => ({ | |
let mut mflags = MacroFlags::defaults(); | |
mflags.set(stringify!($on), stringify!($off)); | |
grammar!(@initialized mflags, $($rest)+ ) | |
}); | |
( $($rest:tt)+ ) => ({ | |
let mflags = MacroFlags::defaults(); | |
grammar!(@initialized mflags, $($rest)+ ) | |
}); | |
} | |
fn main() { | |
let v = vec!["baz", "plugh"]; | |
let g = grammar!{ | |
(?bBw) | |
larry => <bob>{5} | <bar>{3,} | |
larry -> ("this")? <bar>* ("that") <bob>+ | |
bob => r("foo") | |
bar => (?i) [&v] | |
}; | |
g.display(); | |
let g = grammar!{ | |
foo => ("bar") | |
}; | |
g.display(); | |
} | |
#[doc(hidden)] | |
pub struct PseudoGrammar { | |
l: Vec<String>, | |
m: std::collections::HashMap<String,Vec<(MacroFlags,Vec<Part>)>>, | |
macro_flags: MacroFlags, | |
} | |
impl PseudoGrammar { | |
pub fn new(l: Vec<String>, m: std::collections::HashMap<String,Vec<(MacroFlags,Vec<Part>)>>, macro_flags: MacroFlags) -> PseudoGrammar { | |
PseudoGrammar{l, m, macro_flags} | |
} | |
pub fn display(&self) { | |
println!("\n{}", self.macro_flags); | |
for rule in self.l.clone() { | |
print!("{} := ", &rule); | |
println!("{:?}", self.m.get(&rule).unwrap()); | |
} | |
} | |
} | |
#[derive(Debug,Clone)] | |
#[doc(hidden)] | |
pub enum Part { | |
R(String), | |
V(Vec<String>,Option<usize>,Option<usize>,bool), | |
G(String,Option<usize>,Option<usize>,bool), | |
S(String,Option<usize>,Option<usize>,bool), | |
} | |
#[derive(Clone)] | |
#[doc(hidden)] | |
pub struct MacroFlags { | |
case_insensitive: bool, //i | |
multi_line: bool, //m | |
unicode: bool, //u | |
reverse_greed: bool, //U | |
dot_all: bool, //s | |
whitespace_some: bool, //w | |
whitespace_maybe: bool, //W | |
word_left: bool, //b | |
word_right: bool, //B | |
pub add_space: bool, | |
} | |
impl MacroFlags { | |
pub fn defaults() -> MacroFlags { | |
MacroFlags { | |
case_insensitive: false, | |
multi_line: false, | |
unicode: true, | |
reverse_greed: false, | |
dot_all: false, | |
whitespace_some: false, | |
whitespace_maybe: false, | |
word_left: false, | |
word_right: false, | |
add_space: false, | |
} | |
} | |
pub fn from_strings(on: &str, off: &str) -> MacroFlags { | |
let mut mf = MacroFlags::defaults(); | |
mf.set(on, off); | |
mf | |
} | |
pub fn set(&mut self, on: &str, off: &str) { | |
for (s, dir) in vec![(on, true), (off, false)] { | |
for c in s.chars() { | |
match c { | |
'i' => self.case_insensitive = dir, | |
'm' => self.multi_line = dir, | |
'u' => self.unicode = dir, | |
'U' => self.reverse_greed = dir, | |
's' => self.dot_all = dir, | |
'w' => { | |
if dir { | |
self.whitespace_some = true; | |
self.whitespace_maybe = false; | |
} else { | |
self.whitespace_some = false; | |
} | |
}, | |
'W' => { | |
if dir { | |
self.whitespace_some = false; | |
self.whitespace_maybe = true; | |
} else { | |
self.whitespace_maybe = false; | |
} | |
}, | |
'b' => self.word_left = dir, | |
'B' => self.word_right = dir, | |
_ => panic!("unfamiliar flag: {}", c), | |
} | |
} | |
} | |
} | |
} | |
impl std::fmt::Display for MacroFlags { | |
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { | |
let mut parts = vec![]; | |
if self.case_insensitive { | |
parts.push("i"); | |
} | |
if self.multi_line { | |
parts.push("m"); | |
} | |
if self.dot_all { | |
parts.push("s"); | |
} | |
if self.reverse_greed { | |
parts.push("U"); | |
} | |
if self.word_left { | |
parts.push("b"); | |
} | |
if self.word_right { | |
parts.push("B"); | |
} | |
if self.whitespace_some { | |
parts.push("w"); | |
} | |
if self.whitespace_maybe { | |
parts.push("W"); | |
} | |
if self.add_space { | |
parts.push("a"); | |
} | |
if !self.unicode { | |
parts.push("-u"); | |
} | |
write!(f, "(?{})", parts.join("")) | |
} | |
} | |
impl std::fmt::Debug for MacroFlags { | |
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { | |
write!(f, "{}", self) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This code runs without problems. Its output is
When I copy it into the pidgin project in
src/macros.rs
, export it insrc/lib.rs
viaAnd then invoke it in a test in
tests/macros.rs
like soI get a "cannot borrow m as mutable more than once at a time" error during compilation. Here's a detailed description of the error:
Since this is exactly the same code, modulo the addition of
$crate::macros::
in various places, I don't understand how there can be a borrowing problem.