Skip to content

Instantly share code, notes, and snippets.

@jimtla
Created February 21, 2015 17:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jimtla/acbc4ce933dd47514f1d to your computer and use it in GitHub Desktop.
Save jimtla/acbc4ce933dd47514f1d to your computer and use it in GitHub Desktop.
#![feature(collections)]
#![feature(box_patterns)]
#[cfg(feature = "simple")]
pub fn matches(regex : &str, target : &str) -> bool {
match (regex.slice_shift_char(), target.slice_shift_char()) {
// Both strings are emtpy, so we've got a match.
(None, None) => true,
// We're looking at _*, match the kleene star.
(Some((repeater, reg)), _)
if reg.len() > 0 && reg.char_at(0) == '*' => {
// a*rest is equivalent to (aa*rest)|(rest). r is aa*rest, and then
// we check rest separately
let mut r = String::from_str(regex);
r.insert(0, repeater);
matches(&r[..], target) || matches(&reg[1..], target)
},
// . matches any single character
(Some(('.', reg)), Some((_, tar))) => matches(reg, tar),
// Anything else mathces only an identical character
(Some((c, reg)), Some((tc, tar))) if c == tc => matches(reg, tar),
// If nothing matches, we're done.
_ => false,
}
}
#[derive(Clone, Debug)]
enum Regex {
Term(RegexTerm),
Or(RegexTerm, Box<Regex>),
}
impl Regex {
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool {
match self {
&Term(ref term) => term.matches(target, matches_rest),
&Or(ref term, box ref r) =>
term.matches(target, matches_rest) || r.matches(target, matches_rest),
}
}
}
#[derive(Clone, Debug)]
enum RegexTerm {
Empty,
Cons(RegexFactor, Box<RegexTerm>),
}
impl RegexTerm {
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool {
match self {
&Empty => matches_rest(target),
&Cons(ref factor, box ref term) => {
factor.matches(target, &|rest| {
term.matches(rest, matches_rest)
})
}
}
}
}
#[derive(Clone, Debug)]
enum RegexFactor {
Star(RegexBase),
Plus(RegexBase),
Once(RegexBase),
}
impl RegexFactor {
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool {
match self {
&Star(ref base) => {
// b*rest == bb*rest|rest
matches_rest(target) ||
base.matches(target, &|rest| {
Star(base.clone()).matches(rest, matches_rest)
})
},
&Plus(ref base) => {
// b+ == bb*
base.matches(target, &|rest| {
Star(base.clone()).matches(rest, matches_rest)
})
},
&Once(ref base) => base.matches(target, matches_rest),
}
}
}
#[derive(Clone, Debug)]
enum RegexBase {
Paren(Box<Regex>),
Char(char),
Any(String),
Dot,
}
impl RegexBase {
pub fn matches<F: Fn(&str) -> bool>(&self, target : &str, matches_rest : &F) -> bool {
match (self, target.slice_shift_char()) {
(&Paren(box ref regex), _) =>
regex.matches(target, matches_rest),
(&Char(c), Some((tc, rest))) if tc == c => matches_rest(rest),
(&Any(ref chars), Some((tc, rest)))
if chars.contains_char(tc) => matches_rest(rest),
(&Dot, Some((_, rest))) => matches_rest(rest),
_ => false
}
}
}
use Regex::{Term, Or};
use RegexTerm::{Empty, Cons};
use RegexFactor::{Star, Plus, Once};
use RegexBase::{Char, Dot, Any, Paren};
fn parse_regex(regex : &str) -> (Regex, &str) {
let (term, rest) = parse_term(regex);
if rest.len() > 0 && rest.char_at(0) == '|' {
let (r, rest2) = parse_regex(&rest[0..]);
(Or(term, Box::new(r)), rest2)
} else if rest.len() > 0 {
panic!("Malformed regex {}", rest)
} else {
(Term(term), rest)
}
}
fn parse_term(term : &str) -> (RegexTerm, &str) {
if term.len() == 0 {
(Empty, "")
} else {
let (factor, rest) = parse_factor(term);
let (term, rest2) = parse_term(rest);
(Cons(factor, Box::new(term)), rest2)
}
}
fn parse_factor(factor : &str) -> (RegexFactor, &str) {
let (base, rest) = parse_base(factor);
match rest.slice_shift_char() {
Some(('*', _)) => (Star(base), &rest[1..]),
Some(('+', _)) => (Plus(base), &rest[1..]),
_ => (Once(base), rest),
}
}
fn parse_base(factor : &str) -> (RegexBase, &str) {
match factor.slice_shift_char() {
Some(('.', rest)) => {
(Dot, rest)
},
Some(('[', rest)) => {
let ind = rest.find(']').expect("Mismatched '['");
(Any(String::from_str(&rest[..ind])), &rest[ind+1..])
},
Some(('(', rest)) => {
let (x, r) = parse_regex(rest);
(Paren(Box::new(x)), r)
},
Some((c, rest)) => {
(Char(c), rest)
},
None => panic!("Malformed regex... parsing empty as base"),
}
}
#[cfg(feature = "parsed")]
pub fn matches(regex : &str, target : &str) -> bool {
let (regex, rest) = parse_regex(regex);
if rest != "" {
panic!("Malformed regex {:?}", regex)
}
regex.matches(target, &(|rest| rest.len() == 0))
}
#[test]
fn test_matches() {
assert!(matches("asdf", "asdf"));
assert!(!matches("asdf", "asdfg"));
assert!(!matches("asdf", ""));
assert!(!matches("asdf", "foo"));
assert!(!matches("b", "a"));
assert!(matches(".", "f"));
assert!(matches(".", "a"));
assert!(matches(".", "a"));
assert!(matches(".*", ""));
assert!(matches(".*", "anything"));
assert!(matches("a*", "aaa"));
assert!(matches("a*", "aaaaaa"));
assert!(matches("a*", "a"));
assert!(matches("a*", ""));
assert!(matches("b*", "bbbb"));
assert!(matches("aab*", "aabbbb"));
assert!(matches("aab*", "aa"));
assert!(matches(".*@.*", "trey@jellolabs.com"));
assert!(matches(".*@.*", "@"));
assert!(matches("a.c", "abc"));
assert!(matches("a.c", "a1c"));
assert!(!matches("a.c", "ac"));
assert!(matches("a*a", "a"));
assert!(matches("a*a", "aa"));
assert!(matches("a*a", "aaaaaaa"));
assert!(!matches("a*a", ""));
assert!(matches(".*abc.*", "ababc"));
assert!(matches(".*abc.*", "111abc222"));
assert!(matches(".*abc.*", "abcab"));
}
#[test]
#[cfg(feature = "parsed")]
fn test_extended_features() {
assert!(matches("[abc]", "a"));
assert!(matches("[abc]", "b"));
assert!(matches("[abc]*", "baccab"));
assert!(matches("[abc]+", "baccab"));
assert!(!matches("[abc]+", ""));
assert!(matches("[abc]+xx", "abaxx"));
assert!(matches("(abc)+", "abc"));
assert!(matches("(abc)+", "abcabc"));
assert!(matches("(ab+c)+", "abbbbcabbc"));
assert!(matches("abc|d", "abc"));
assert!(matches("abc|d", "d"));
assert!(matches("(a*|d)e", "de"));
assert!(matches("(a*|d)e", "aaae"));
assert!(!matches("(a*|d)e", "aaade"));
assert!(!matches("(a*|d)e", "aaa"));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment