Last active
December 25, 2015 10:58
-
-
Save elizarov/6965053 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.BitSet; | |
/** | |
* @author Roman Elizarov | |
*/ | |
public class Regex { | |
private final int n; // number of actual chars in pattern (w/o *) | |
private final char[] cs; // actual chars in pattens (w/o *) | |
private final boolean[] rs; // true when * applies | |
private final int[] fs; // transitive closure of match, given that an empty string matches x* | |
public Regex(String p) { | |
cs = new char[p.length()]; | |
rs = new boolean[p.length()]; | |
int n = 0; | |
for (int i = 0; i < p.length(); i++) { | |
char c = p.charAt(i); | |
if (n > 0 && c == '*') | |
rs[n - 1] = true; | |
else | |
cs[n++] = c; | |
} | |
fs = new int[n + 1]; | |
for (int i = 0; i <= n; i++) { | |
int f = i; | |
while (f < n && rs[f]) | |
f++; | |
fs[i] = f + 1; | |
} | |
this.n = n; | |
} | |
public boolean matches(String s) { | |
BitSet m0 = new BitSet(n + 1); | |
BitSet m1 = new BitSet(n + 1); | |
m0.set(0, fs[0]); | |
for (int i = 0; i < s.length(); i++) { | |
char c = s.charAt(i); | |
m1.clear(0, n + 2); | |
for (int j = m0.nextSetBit(0); j >= 0; j = m0.nextSetBit(j + 1)) { | |
if (j < n && matchCharAt(c, j)) | |
m1.set(j + 1, fs[j + 1]); | |
if (j > 0 && rs[j - 1] && matchCharAt(c, j - 1)) | |
m1.set(j, fs[j]); | |
} | |
BitSet t = m0; | |
m0 = m1; | |
m1 = t; | |
} | |
return m0.get(n); | |
} | |
private boolean matchCharAt(char c, int j) { | |
return cs[j] == c || cs[j] == '.'; | |
} | |
// ----- actual code ends, tests below ----- | |
public static void main(String[] args) { | |
mustMatch("", ""); | |
mustNotMatch("", "a"); | |
mustMatch("a", "a*"); | |
mustMatch("a", ".*"); | |
mustMatch("", ".*"); | |
mustMatch("", "a*"); | |
mustNotMatch("b", "a*"); | |
mustMatch("b", ".*"); | |
mustMatch("b", "ba*"); | |
mustMatch("abba", "abba"); | |
mustNotMatch("", "."); | |
mustMatch("a", "."); | |
mustMatch("ac", "ab*c"); | |
mustMatch("abbbc", "ab*c"); | |
mustNotMatch("abbbc", "a.*b"); | |
mustMatch("abbbc", "a.*c"); | |
mustMatch("ac", ".*c"); | |
mustMatch("aac", "a*b*ac"); | |
mustMatch("ababc", ".*bc"); // test for a bug in previous rev | |
mustMatch("bbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaa", "b*a*b*a*b*a*b*a*b*a*"); | |
} | |
private static void mustMatch(String s, String p) { | |
assert new Regex(p).matches(s); | |
} | |
private static void mustNotMatch(String s, String p) { | |
assert !new Regex(p).matches(s); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment