briangordon/Regex challenge

## gistfile1.java
import java.util.ArrayList;
import java.util.List;

public class SimplifiedRegexMatcher {
	private SimplifiedRegexMatcher() {}

	public static boolean match(String regex, String needle) {
		// Our algorithm will fail if an open parenthesis is the first character, so add a character to the beginning of
		// both the regex and the needle.
		regex = '!' + regex;
		needle = '!' + needle;

		// Trailer characters
		regex = regex + '^';
		needle = needle + '^';

		// Keep track of a pointer to all possible current locations in the regex
		List<Integer> possibleRegexIndices = new ArrayList<>();

		// If the regex is empty then it can only match an empty string
		if(regex.equals("")) {
			return needle.equals("");
		}

		// First scan forward to the first letter in the regex
		int i = 0;
		while(regex.charAt(i) == '(') {
			i++;
		}

		// Start with the first letter in our possible states
		possibleRegexIndices.add(i);

		// Scan through the needle one character at a time
		for(int needle_index = 0; needle_index < needle.length() - 1; needle_index++) {
			// If we have no more options, then it must not have been a match.
			if(possibleRegexIndices.size() == 0) {
				return false;
			}

			char currentNeedle = needle.charAt(needle_index);

			List<Integer> nextPossibleRegexIndices = new ArrayList<>();
			for(Integer regex_index : possibleRegexIndices) {
				if(regex.charAt(regex_index) != currentNeedle) {
					continue;
				}

				// We know that we're pointing to a letter right now. What we do next depends on whether the next character
				// is a parenthesis or not.
				char nextRegexCharacter = regex.charAt(regex_index + 1);

				if(nextRegexCharacter == '(') {
					// Scan forwards to the next letter and add it
					int j = regex_index + 1;
					while(regex.charAt(j) == '(' || regex.charAt(j) == '*' || regex.charAt(j) == ')') {
						j++;
					}
					nextPossibleRegexIndices.add(j);

					// Scan forward to the matching end parenthesis
					j = regex_index + 2;
					int depth = 0;
					while(regex.charAt(j) != ')' || depth > 0) {
						if(regex.charAt(j) == '(') {
							depth++;
						} else if(regex.charAt(j) == ')') {
							depth--;
						}
						j++;
					}

					// Scan forwards to the next letter and add it
					while(regex.charAt(j) == '(' || regex.charAt(j) == '*' || regex.charAt(j) == ')') {
						j++;
					}
					nextPossibleRegexIndices.add(j);


				} else if(nextRegexCharacter == ')') {
					// Scan forwards to the next letter and add it
					int j = regex_index + 1;
					while(regex.charAt(j) == '(' || regex.charAt(j) == '*' || regex.charAt(j) == ')') {
						j++;

						// If we go off the end of the regex then this was clearly not a valid possibility
						if(j == regex.length()) {
							continue;
						}
					}
					nextPossibleRegexIndices.add(j);

					// Scan backwards to the opening parenthesis
					j = regex_index;
					int depth = 0;
					while(regex.charAt(j) != '(' || depth > 0) {
						if(regex.charAt(j) == ')') {
							depth++;
						} else if(regex.charAt(j) == '(') {
							depth--;
						}
						j--;
					}

					// Scan forwards to the next letter and add it
					while(regex.charAt(j) == '(' || regex.charAt(j) == '*' || regex.charAt(j) == ')') {
						j++;
					}
					nextPossibleRegexIndices.add(j);
				} else {
					// Just add the next letter
					nextPossibleRegexIndices.add(regex_index + 1);
				}
			}

			// Replace our current possible regex locations with the next pass
			possibleRegexIndices = nextPossibleRegexIndices;
		}

		// Return true iff one possible state is the terminating character
		return possibleRegexIndices.contains(regex.length() - 1);
	}

	private static void shouldMatch(String regex, String needle) {
		System.out.println("The regex " + regex + " should match the needle " + needle + " - " + (match(regex, needle) ? "OK" : "ERROR"));
	}

	private static void shouldNotMatch(String regex, String needle) {
		System.out.println("The regex " + regex + " should not match the needle " + needle + " - " + (!match(regex, needle) ? "OK" : "ERROR"));
	}


	public static void main(String[] args) {
		shouldMatch("abcd", "abcd");
		shouldMatch("(abc)*", "abc");
		shouldMatch("(abc)*(abc)*", "abcabc");
		shouldMatch("b(oo)*p", "bp");
		shouldMatch("(b(an)*a)*", "bananaba");
		shouldMatch("b(a(a)*)*", "b");
		shouldMatch("b(a(a)*)*", "ba");
		shouldMatch("b(a(a)*)*", "baa");

		shouldNotMatch("abcd", "abc");
		shouldNotMatch("abc", "abcd");
		shouldNotMatch("b(a(c)*a)*", "bacac");
	}
}

## gistfile2.java

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;

// started 1:40
// ended 2:04
public class CheatingRegexMatcher {
	// Increase this to make failure cases rarer at the cost of performance.
	private static final int BRANCHING_FACTOR = 5;

	private static String replaceNTimes(String str, int startIdx, int endIdx, int n) {
		String firstPart = str.substring(0, startIdx);
		String endPart = str.substring(endIdx + 1, str.length());
		String multiplyPart = str.substring(startIdx + 1, endIdx - 1);

		StringBuilder sb = new StringBuilder(firstPart.length() + (endPart.length() * n) + multiplyPart.length());
		sb.append(firstPart);
		for(int i = 0; i < n; i++) {
			sb.append(multiplyPart);
		}
		sb.append(endPart);

		return sb.toString();
	}

	public static boolean match(String regex, String str) {
		if(regex.equals(str)) return true;
		if(!regex.contains("*")) return false;

		Deque<String> buffer = new ArrayDeque<>();
		buffer.add(regex);

		while(!buffer.isEmpty()) {
			String curString = buffer.remove();

			// Find the leftmost open parenthesis
			int j = 0;
			while(curString.charAt(j) != '(') {
				j++;
			}

			// Scan to the closing parenthesis
			int k = j + 1, depth = 0;
			while(curString.charAt(k) != ')' || depth > 0) {
				if(curString.charAt(k) == '(') {
					depth++;
				} else if(curString.charAt(k) == ')') {
					depth--;
				}
				k++;
			}

			// Go forward one to the asterisk
			k++;

			// Replace (abc)* with abc, abcabc, abcabcabc, abcabcabcabc, etc.
			for(int i = 0; i < BRANCHING_FACTOR; i++) {
				String replaced = replaceNTimes(curString, j, k, i);
				if(replaced.equals(str)) {
					return true;
				}
				if(replaced.contains("*")) {
					buffer.add(replaced);
				}
			}
		}

		return false;
	}

	private static void shouldMatch(String regex, String needle) {
		System.out.println("The regex " + regex + " should match the needle " + needle + " - " + (match(regex, needle) ? "OK" : "ERROR"));
	}

	private static void shouldNotMatch(String regex, String needle) {
		System.out.println("The regex " + regex + " should not match the needle " + needle + " - " + (!match(regex, needle) ? "OK" : "ERROR"));
	}


	public static void main(String[] args) {
		shouldMatch("abcd", "abcd");
		shouldMatch("(abc)*", "abc");
		shouldMatch("(abc)*(abc)*", "abcabc");
		shouldMatch("b(oo)*p", "bp");
		shouldMatch("(b(an)*a)*", "bananaba");
		shouldMatch("b(a(a)*)*", "b");
		shouldMatch("b(a(a)*)*", "ba");
		shouldMatch("b(a(a)*)*", "baa");

		// This is going to fail because it requires too much expansion. That's the downside of cheating.
		shouldMatch("(a)*", "aaaaaaaaaaa");

		shouldNotMatch("abcd", "abc");
		shouldNotMatch("abc", "abcd");
		shouldNotMatch("b(a(c)*a)*", "bacac");
	}
}

## Regex challenge
Consider the subset of regular expressions that only use lowercase letters, parentheses, and the Kleene star *.
The presence of a Kleene star means that the preceding parenthesized phrase can appear zero or more times.
Parentheses are only valid around the text preceding a Kleene star. Write a program that takes such a regular
expression and a string and determines whether or not that entire string matches the regular expression.

For example, the string on the left matches the regular expression on the right:

    hello			hello
    testtest			(test)*
    helllllllo			he(l)*lo
    bp				b(oo)*p
    abcd			a(bc)*(bc)*d
    bananaba			(b(an)*a)*

Don’t use any existing regular expression utilities.

You may assume that the regex you’re given is syntactically valid.
	import java.util.ArrayList;
	import java.util.List;

	public class SimplifiedRegexMatcher {
	private SimplifiedRegexMatcher() {}

	public static boolean match(String regex, String needle) {
	// Our algorithm will fail if an open parenthesis is the first character, so add a character to the beginning of
	// both the regex and the needle.
	regex = '!' + regex;
	needle = '!' + needle;

	// Trailer characters
	regex = regex + '^';
	needle = needle + '^';

	// Keep track of a pointer to all possible current locations in the regex
	List<Integer> possibleRegexIndices = new ArrayList<>();

	// If the regex is empty then it can only match an empty string
	if(regex.equals("")) {
	return needle.equals("");
	}

	// First scan forward to the first letter in the regex
	int i = 0;
	while(regex.charAt(i) == '(') {
	i++;
	}

	// Start with the first letter in our possible states
	possibleRegexIndices.add(i);

	// Scan through the needle one character at a time
	for(int needle_index = 0; needle_index < needle.length() - 1; needle_index++) {
	// If we have no more options, then it must not have been a match.
	if(possibleRegexIndices.size() == 0) {
	return false;
	}

	char currentNeedle = needle.charAt(needle_index);

	List<Integer> nextPossibleRegexIndices = new ArrayList<>();
	for(Integer regex_index : possibleRegexIndices) {
	if(regex.charAt(regex_index) != currentNeedle) {
	continue;
	}

	// We know that we're pointing to a letter right now. What we do next depends on whether the next character
	// is a parenthesis or not.
	char nextRegexCharacter = regex.charAt(regex_index + 1);

	if(nextRegexCharacter == '(') {
	// Scan forwards to the next letter and add it
	int j = regex_index + 1;
	while(regex.charAt(j) == '(' \|\| regex.charAt(j) == '*' \|\| regex.charAt(j) == ')') {
	j++;
	}
	nextPossibleRegexIndices.add(j);

	// Scan forward to the matching end parenthesis
	j = regex_index + 2;
	int depth = 0;
	while(regex.charAt(j) != ')' \|\| depth > 0) {
	if(regex.charAt(j) == '(') {
	depth++;
	} else if(regex.charAt(j) == ')') {
	depth--;
	}
	j++;
	}

	// Scan forwards to the next letter and add it
	while(regex.charAt(j) == '(' \|\| regex.charAt(j) == '*' \|\| regex.charAt(j) == ')') {
	j++;
	}
	nextPossibleRegexIndices.add(j);


	} else if(nextRegexCharacter == ')') {
	// Scan forwards to the next letter and add it
	int j = regex_index + 1;
	while(regex.charAt(j) == '(' \|\| regex.charAt(j) == '*' \|\| regex.charAt(j) == ')') {
	j++;

	// If we go off the end of the regex then this was clearly not a valid possibility
	if(j == regex.length()) {
	continue;
	}
	}
	nextPossibleRegexIndices.add(j);

	// Scan backwards to the opening parenthesis
	j = regex_index;
	int depth = 0;
	while(regex.charAt(j) != '(' \|\| depth > 0) {
	if(regex.charAt(j) == ')') {
	depth++;
	} else if(regex.charAt(j) == '(') {
	depth--;
	}
	j--;
	}

	// Scan forwards to the next letter and add it
	while(regex.charAt(j) == '(' \|\| regex.charAt(j) == '*' \|\| regex.charAt(j) == ')') {
	j++;
	}
	nextPossibleRegexIndices.add(j);
	} else {
	// Just add the next letter
	nextPossibleRegexIndices.add(regex_index + 1);
	}
	}

	// Replace our current possible regex locations with the next pass
	possibleRegexIndices = nextPossibleRegexIndices;
	}

	// Return true iff one possible state is the terminating character
	return possibleRegexIndices.contains(regex.length() - 1);
	}

	private static void shouldMatch(String regex, String needle) {
	System.out.println("The regex " + regex + " should match the needle " + needle + " - " + (match(regex, needle) ? "OK" : "ERROR"));
	}

	private static void shouldNotMatch(String regex, String needle) {
	System.out.println("The regex " + regex + " should not match the needle " + needle + " - " + (!match(regex, needle) ? "OK" : "ERROR"));
	}


	public static void main(String[] args) {
	shouldMatch("abcd", "abcd");
	shouldMatch("(abc)*", "abc");
	shouldMatch("(abc)(abc)", "abcabc");
	shouldMatch("b(oo)*p", "bp");
	shouldMatch("(b(an)a)", "bananaba");
	shouldMatch("b(a(a))", "b");
	shouldMatch("b(a(a))", "ba");
	shouldMatch("b(a(a))", "baa");

	shouldNotMatch("abcd", "abc");
	shouldNotMatch("abc", "abcd");
	shouldNotMatch("b(a(c)a)", "bacac");
	}
	}

	import java.util.ArrayDeque;
	import java.util.ArrayList;
	import java.util.Deque;
	import java.util.List;

	// started 1:40
	// ended 2:04
	public class CheatingRegexMatcher {
	// Increase this to make failure cases rarer at the cost of performance.
	private static final int BRANCHING_FACTOR = 5;

	private static String replaceNTimes(String str, int startIdx, int endIdx, int n) {
	String firstPart = str.substring(0, startIdx);
	String endPart = str.substring(endIdx + 1, str.length());
	String multiplyPart = str.substring(startIdx + 1, endIdx - 1);

	StringBuilder sb = new StringBuilder(firstPart.length() + (endPart.length() * n) + multiplyPart.length());
	sb.append(firstPart);
	for(int i = 0; i < n; i++) {
	sb.append(multiplyPart);
	}
	sb.append(endPart);

	return sb.toString();
	}

	public static boolean match(String regex, String str) {
	if(regex.equals(str)) return true;
	if(!regex.contains("*")) return false;

	Deque<String> buffer = new ArrayDeque<>();
	buffer.add(regex);

	while(!buffer.isEmpty()) {
	String curString = buffer.remove();

	// Find the leftmost open parenthesis
	int j = 0;
	while(curString.charAt(j) != '(') {
	j++;
	}

	// Scan to the closing parenthesis
	int k = j + 1, depth = 0;
	while(curString.charAt(k) != ')' \|\| depth > 0) {
	if(curString.charAt(k) == '(') {
	depth++;
	} else if(curString.charAt(k) == ')') {
	depth--;
	}
	k++;
	}

	// Go forward one to the asterisk
	k++;

	// Replace (abc)* with abc, abcabc, abcabcabc, abcabcabcabc, etc.
	for(int i = 0; i < BRANCHING_FACTOR; i++) {
	String replaced = replaceNTimes(curString, j, k, i);
	if(replaced.equals(str)) {
	return true;
	}
	if(replaced.contains("*")) {
	buffer.add(replaced);
	}
	}
	}

	return false;
	}

	private static void shouldMatch(String regex, String needle) {
	System.out.println("The regex " + regex + " should match the needle " + needle + " - " + (match(regex, needle) ? "OK" : "ERROR"));
	}

	private static void shouldNotMatch(String regex, String needle) {
	System.out.println("The regex " + regex + " should not match the needle " + needle + " - " + (!match(regex, needle) ? "OK" : "ERROR"));
	}


	public static void main(String[] args) {
	shouldMatch("abcd", "abcd");
	shouldMatch("(abc)*", "abc");
	shouldMatch("(abc)(abc)", "abcabc");
	shouldMatch("b(oo)*p", "bp");
	shouldMatch("(b(an)a)", "bananaba");
	shouldMatch("b(a(a))", "b");
	shouldMatch("b(a(a))", "ba");
	shouldMatch("b(a(a))", "baa");

	// This is going to fail because it requires too much expansion. That's the downside of cheating.
	shouldMatch("(a)*", "aaaaaaaaaaa");

	shouldNotMatch("abcd", "abc");
	shouldNotMatch("abc", "abcd");
	shouldNotMatch("b(a(c)a)", "bacac");
	}
	}
	Consider the subset of regular expressions that only use lowercase letters, parentheses, and the Kleene star *.
	The presence of a Kleene star means that the preceding parenthesized phrase can appear zero or more times.
	Parentheses are only valid around the text preceding a Kleene star. Write a program that takes such a regular
	expression and a string and determines whether or not that entire string matches the regular expression.

	For example, the string on the left matches the regular expression on the right:

	hello hello
	testtest (test)*
	helllllllo he(l)*lo
	bp b(oo)*p
	abcd a(bc)(bc)d
	bananaba (b(an)a)

	Don’t use any existing regular expression utilities.

	You may assume that the regex you’re given is syntactically valid.