Skip to content

Instantly share code, notes, and snippets.

@brendanzab
Created October 16, 2012 06:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brendanzab/3897455 to your computer and use it in GitHub Desktop.
Save brendanzab/3897455 to your computer and use it in GitHub Desktop.
// based off the examples shown in this blog post:
// http://blog.rfw.name/2012/10/12/parser_combinators.html
enum ParseNode {
NonTerminal(&self/[ParseNode]),
Terminal(&self/str),
Empty,
}
enum ParseResult {
ParseOk(&self/ParseNode, &self/str),
NoParse,
}
enum Parser {
Concat (&self/[Parser]),
Alternate (&self/[Parser]),
OneOrMore (&self/Parser),
ZeroOrMore (&self/Parser),
Optional (&self/Parser),
CharRange (char, char),
Literal (&self/str),
}
/**
* Concat!(p1, p2 ... pn) => Concat(~[p1, p2 ... pn])
*/
macro_rules! Concat(
($($elem:expr),+) => (
Concat([ $($elem),+ ])
)
)
/**
* Alternate!(p1, p2 ... pn) => Alternate(~[p1, p2 ... pn])
*/
macro_rules! Alternate(
($($elem:expr),+) => (
Alternate([ $($elem),+ ])
)
)
impl Parser {
fn parse(src: &s/str) -> ParseResult {
match self {
Concat(arr) => concat(src, arr),
Alternate(arr) => alternate(src, arr),
OneOrMore(p) => one_or_more(src, p),
ZeroOrMore(p) => zero_or_more(src, p),
Optional(p) => optional(src, p),
CharRange(low, high) => char_range(src, low, high),
Literal(name) => literal(src, name)
}
}
}
/**
* Represents the concatenation of two parsers, i.e parse using the first
* parser, then the second, then return both results.
*/
fn concat(src: &s/str, parsers: &[Parser]) -> ParseResult {
let mut nodes = ~[];
let mut tail = src;
for parsers.each |p| {
match p.parse(tail) {
ParseOk(n, t) => {
nodes.push(*n);
tail = t;
}
NoParse => {
return NoParse;
}
}
}
return ParseOk(&NonTerminal(nodes), tail);
}
/**
* Represents the alternation of two parsers, i.e. parse using the first
* parser and, if that doesn't match, parse with the second parser.
*/
fn alternate(src: &s/str, parsers: &[Parser]) -> ParseResult {
for parsers.each |p| {
match p.parse(src) {
ParseOk(node, tail) => { return ParseOk(node, copy tail); }
NoParse => { loop; }
}
}
return NoParse;
}
/**
* Represents the Kleene star operation, i.e. attempt to parse one or more
* results using the given parser.
*/
fn one_or_more(src: &s/str, parser: &s/Parser) -> ParseResult {
let mut nodes = ~[];
let mut tail = src; // <- copy? ugh
loop {
match parser.parse(tail) {
ParseOk(node, tl) => {
nodes.push(*node);
tail = tl;
}
NoParse => {
break;
}
}
}
match nodes.is_not_empty() {
true => ParseOk(&NonTerminal(nodes), tail),
false => NoParse
}
}
/**
* Represents an optional one_or_more parser.
*/
fn zero_or_more(src: &s/str, parser: &s/Parser) -> ParseResult {
Optional(@OneOrMore(parser)).parse(src)
}
/**
* Represents a parser that doesn't fail if there is no match.
*/
fn optional(src: &s/str, parser: &s/Parser) -> ParseResult {
match parser.parse(src) {
ParseOk(node, tail) => ParseOk(node, tail),
NoParse => ParseOk(&Empty, str::from_slice(src))
}
}
/**
* Parse a single if it is in the given range
*/
fn char_range(src: &s/str, low: char, high: char) -> ParseResult {
if src.is_not_empty() {
let c = src[0] as char;
if c >= low && c <= high {
return ParseOk(&Terminal(str::from_char(c)), str::view(src, 1, src.len()));
}
}
return NoParse;
}
/**
* Parse a literal.
*/
fn literal(src: &s/str, name: &s/str) -> ParseResult {
match src.starts_with(name) {
true => ParseOk(&Terminal(name), src.slice(name.len(), src.len())),
false => NoParse
}
}
fn main() {
let match_ab = Concat!(Literal("a"), Literal("b"));
io::println(fmt!("match_ab.parse(\"ab\") -> %?", match_ab.parse("ab")));
let match_a_or_b = Alternate!(Literal("a"), Literal("b"));
io::println(fmt!("match_a_or_b.parse(\"a\") -> %?", match_a_or_b.parse("a")));
io::println(fmt!("match_a_or_b.parse(\"b\") -> %?", match_a_or_b.parse("b")));
io::println(fmt!("match_a_or_b.parse(\"c\") -> %?", match_a_or_b.parse("c")));
let match_a_plus = OneOrMore(&Literal("a"));
io::println(fmt!("match_ab.parse(\"aaab\") -> %?", match_a_plus.parse("aaab")));
let digit = CharRange('0', '9');
io::println(fmt!("digit.parse(\"3\") -> %?", digit.parse("3")));
io::println(fmt!("digit.parse(\"9\") -> %?", digit.parse("9")));
io::println(fmt!("digit.parse(\"a\") -> %?", digit.parse("a")));
let number = OneOrMore(&digit);
io::println(fmt!("number.parse(\"1234\") -> %?", number.parse("1234")));
io::println(fmt!("number.parse(\"abcd\") -> %?", number.parse("abcd")));
io::println(fmt!("number.parse(\"123abc\") -> %?", number.parse("123abc")));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment