Skip to content

Instantly share code, notes, and snippets.

@tecywiz121
Created August 9, 2013 13:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tecywiz121/6193721 to your computer and use it in GitHub Desktop.
Save tecywiz121/6193721 to your computer and use it in GitHub Desktop.
Java class that implements a state machine for converting glob-like patterns into regular expressions
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.File;
public final class GlobBuilder {
private static final String PATTERN_STAR;
private static final String PATTERN_STAR_STAR = ".*";
private static final String PATTERN_QUESTION = ".";
static {
String separator;
if (File.separatorChar == '\\') {
separator = "\\\\";
} else {
separator = File.separator;
}
PATTERN_STAR = "[^" + separator + "]*";
}
private final StringBuilder m_Regex = new StringBuilder();
private int m_State = 0;
private boolean m_Consumed = false;
private char m_Current;
public static void main(String[] args) {
GlobBuilder gb = new GlobBuilder();
gb.add("*\\\\*\\\\*\\\\test.html");
try {
System.out.println(gb.toRegex());
Pattern p = Pattern.compile(gb.toRegex());
String[] tests = new String[] {
"test.png",
"test.html",
"subdir\\test.html",
"red\\blue\\green\\test.html",
"c:\\red\\blue\\green\\test.html",
};
for (String s : tests) {
Matcher m = p.matcher(s);
System.out.println(s + ": " + (m.find() ? "matches" : "no match"));
}
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
/**
* Call the proper state function
*/
private void invokeState() {
try {
Method m = this.getClass().getDeclaredMethod("_state" + m_State);
Integer state = (Integer) m.invoke(this);
m_State = state.intValue();
} catch (SecurityException e) {
throw new RuntimeException("Security exception while accessing own methods", e);
} catch (NoSuchMethodException e) {
throw new RuntimeException("Invalid internal state " + m_State);
} catch (IllegalArgumentException e) {
throw new RuntimeException("Illegal arguments when calling state method", e);
} catch (IllegalAccessException e) {
throw new RuntimeException("Unable to access own methods", e);
} catch (InvocationTargetException e) {
if (e.getCause() instanceof RuntimeException) {
throw (RuntimeException) e.getCause();
} else {
throw new RuntimeException("State method threw invalid exception type", e.getCause());
}
}
}
/**
* Mark the character as consumed, meaning the state machine is ready to
* accept another character
*/
private void consume() {
m_Consumed = true;
}
private char cc() {
return m_Current;
}
private void append(String str) {
m_Regex.append(str);
}
private void append(char c) {
m_Regex.append(c);
}
/**
* IN ->
* OUT -> 1, 2
* @return
*/
protected int _state0() {
switch (cc()) {
case '*': // *
consume();
return 1;
default:
append('^');
return 2;
}
}
/**
* IN -> 0
* OUT -> 2
* @return
*/
protected int _state1() {
switch (cc()) {
case '*': // **
consume();
append(PATTERN_STAR_STAR);
return 2;
default: // *[^*]
append('^');
append(PATTERN_STAR);
return 2;
}
}
/**
* IN -> 0, 1, 2
* OUT -> 2, 3, 4
* @return
*/
protected int _state2() {
switch (cc()) {
// Escape Next Character
case '\\':
consume();
return 3;
// Regular character that needs to be escaped in the regex
case '.':
case '(':
case ')':
case '+':
case '|':
case '^':
case '$':
case '@':
case '%':
consume();
append('\\');
append(cc());
return 2;
case '*':
consume();
return 4;
case '?':
consume();
append(PATTERN_QUESTION);
return 2;
default:
consume();
append(cc());
return 2;
}
}
/**
* IN -> 2
* OUT -> 2
* @return
*/
protected int _state3() {
switch (cc()) {
case ',':
case '*':
case '?':
case '\\':
case '{':
case '}':
case '.':
case '(':
case ')':
case '+':
case '|':
case '^':
case '$':
case '@':
case '%':
consume();
append('\\');
append(cc());
return 2;
default:
consume();
append(cc());
return 2;
}
}
/**
* IN -> 2
* OUT -> 2
* @return
*/
protected int _state4() {
switch (cc()) {
case '*':
consume();
append(PATTERN_STAR_STAR);
return 2;
default:
append(PATTERN_STAR);
return 2;
}
}
/**
* Add the next character in the glob string
* @param cc
*/
public void add(char cc) {
m_Consumed = false;
m_Current = cc;
while (!m_Consumed) {
invokeState();
}
}
public void add(CharSequence str) {
for (int ii = 0; ii < str.length(); ii++) {
add(str.charAt(ii));
}
}
public void add(String str) {
for (char c : str.toCharArray()) {
add(c);
}
}
public String toRegex() throws Exception {
StringBuilder sb = new StringBuilder(m_Regex);
switch (m_State) {
case 0:
case 2:
break;
case 1:
sb.append('^');
sb.append(PATTERN_STAR);
break;
case 3:
throw new Exception("unterminated escape");
}
sb.append('$');
return sb.toString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment