Last active
August 29, 2015 14:15
-
-
Save jimtla/712ab5462dfc0548a897 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(collections)] | |
#![feature(box_patterns)] | |
#[derive(Clone, Debug)] | |
enum Regex { | |
Term(RegexTerm), | |
Or(RegexTerm, Box<Regex>), | |
} | |
impl Regex { | |
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool { | |
match self { | |
&Term(ref term) => term.matches(target, matches_rest), | |
&Or(ref term, box ref r) => | |
term.matches(target, matches_rest) || r.matches(target, matches_rest), | |
} | |
} | |
} | |
#[derive(Clone, Debug)] | |
enum RegexTerm { | |
Empty, | |
Cons(RegexFactor, Box<RegexTerm>), | |
} | |
impl RegexTerm { | |
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool { | |
match self { | |
&Empty => matches_rest(target), | |
&Cons(ref factor, box ref term) => { | |
factor.matches(target, &|rest| { | |
term.matches(rest, matches_rest) | |
}) | |
} | |
} | |
} | |
} | |
#[derive(Clone, Debug)] | |
enum RegexFactor { | |
Star(RegexBase), | |
Plus(RegexBase), | |
Once(RegexBase), | |
} | |
impl RegexFactor { | |
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool { | |
match self { | |
&Star(ref base) => { | |
// b*rest == bb*rest|rest | |
matches_rest(target) || | |
base.matches(target, &|rest| { | |
Star(base.clone()).matches(rest, matches_rest) | |
}) | |
}, | |
&Plus(ref base) => { | |
// b+ == bb* | |
base.matches(target, &|rest| { | |
Star(base.clone()).matches(rest, matches_rest) | |
}) | |
}, | |
&Once(ref base) => base.matches(target, matches_rest), | |
} | |
} | |
} | |
#[derive(Clone, Debug)] | |
enum RegexBase { | |
Paren(Box<Regex>), | |
Char(char), | |
Any(String), | |
Dot, | |
} | |
impl RegexBase { | |
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool { | |
match (self, target.slice_shift_char()) { | |
(&Paren(box ref regex), _) => | |
regex.matches(target, matches_rest), | |
(&Char(c), Some((tc, rest))) if tc == c => matches_rest(rest), | |
(&Any(ref chars), Some((tc, rest))) | |
if chars.contains_char(tc) => matches_rest(rest), | |
(&Dot, Some((_, rest))) => matches_rest(rest), | |
_ => false | |
} | |
} | |
} | |
use Regex::{Term, Or}; | |
use RegexTerm::{Empty, Cons}; | |
use RegexFactor::{Star, Plus, Once}; | |
use RegexBase::{Char, Dot, Any, Paren}; | |
fn parse_regex(regex : &str) -> (Regex, &str) { | |
let (term, rest) = parse_term(regex); | |
if rest.len() > 0 && rest.char_at(0) == '|' { | |
let (r, rest2) = parse_regex(&rest[0..]); | |
(Or(term, Box::new(r)), rest2) | |
} else if rest.len() > 0 { | |
panic!("Malformed regex {}", rest) | |
} else { | |
(Term(term), rest) | |
} | |
} | |
fn parse_term(term : &str) -> (RegexTerm, &str) { | |
if term.len() == 0 { | |
(Empty, "") | |
} else { | |
let (factor, rest) = parse_factor(term); | |
let (term, rest2) = parse_term(rest); | |
(Cons(factor, Box::new(term)), rest2) | |
} | |
} | |
fn parse_factor(factor : &str) -> (RegexFactor, &str) { | |
let (base, rest) = parse_base(factor); | |
match rest.slice_shift_char() { | |
Some(('*', _)) => (Star(base), &rest[1..]), | |
Some(('+', _)) => (Plus(base), &rest[1..]), | |
_ => (Once(base), rest), | |
} | |
} | |
fn parse_base(factor : &str) -> (RegexBase, &str) { | |
match factor.slice_shift_char() { | |
Some(('.', rest)) => { | |
(Dot, rest) | |
}, | |
Some(('[', rest)) => { | |
let ind = rest.find(']').expect("Mismatched '['"); | |
(Any(String::from_str(&rest[..ind])), &rest[ind+1..]) | |
}, | |
Some(('(', rest)) => { | |
let (x, r) = parse_regex(rest); | |
(Paren(Box::new(x)), r) | |
}, | |
Some((c, rest)) => { | |
(Char(c), rest) | |
}, | |
None => panic!("Malformed regex... parsing empty as base"), | |
} | |
} | |
pub fn matches(regex : &str], target : &str) -> bool { | |
let (regex, rest) = parse_regex(regex); | |
if rest != "" { | |
panic!("Malformed regex {:?}", regex) | |
} | |
regex.matches(target, &(|rest| rest.len() == 0)) | |
} | |
#[test] | |
fn test_matches() { | |
assert!(matches("asdf", "asdf")); | |
assert!(!matches("asdf", "asdfg")); | |
assert!(!matches("asdf", "")); | |
assert!(!matches("asdf", "foo")); | |
assert!(!matches("b", "a")); | |
assert!(matches(".", "f")); | |
assert!(matches(".", "a")); | |
assert!(matches(".", "a")); | |
assert!(matches(".*", "")); | |
assert!(matches(".*", "anything")); | |
assert!(matches("a*", "aaa")); | |
assert!(matches("a*", "aaaaaa")); | |
assert!(matches("a*", "a")); | |
assert!(matches("a*", "")); | |
assert!(matches("b*", "bbbb")); | |
assert!(matches("aab*", "aabbbb")); | |
assert!(matches("aab*", "aa")); | |
assert!(matches(".*@.*", "trey@jellolabs.com")); | |
assert!(matches(".*@.*", "@")); | |
assert!(matches("a.c", "abc")); | |
assert!(matches("a.c", "a1c")); | |
assert!(!matches("a.c", "ac")); | |
assert!(matches("a*a", "a")); | |
assert!(matches("a*a", "aa")); | |
assert!(matches("a*a", "aaaaaaa")); | |
assert!(!matches("a*a", "")); | |
assert!(matches(".*abc.*", "ababc")); | |
assert!(matches(".*abc.*", "111abc222")); | |
assert!(matches(".*abc.*", "abcab")); | |
} | |
#[test] | |
#[cfg(feature = "parsed")] | |
fn test_extended_features() { | |
assert!(matches("[abc]", "a")); | |
assert!(matches("[abc]", "b")); | |
assert!(matches("[abc]*", "baccab")); | |
assert!(matches("[abc]+", "baccab")); | |
assert!(!matches("[abc]+", "")); | |
assert!(matches("[abc]+xx", "abaxx")); | |
assert!(matches("(abc)+", "abc")); | |
assert!(matches("(abc)+", "abcabc")); | |
assert!(matches("(ab+c)+", "abbbbcabbc")); | |
assert!(matches("abc|d", "abc")); | |
assert!(matches("abc|d", "d")); | |
assert!(matches("(a*|d)e", "de")); | |
assert!(matches("(a*|d)e", "aaae")); | |
assert!(!matches("(a*|d)e", "aaade")); | |
assert!(!matches("(a*|d)e", "aaa")); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment