Last active
July 9, 2016 19:31
-
-
Save jonaslu/241e658b6ed428f83910ea728ad9c349 to your computer and use it in GitHub Desktop.
Python version of the regex-engine in Pike and Kerningham: Practice of programming.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def match(text, regex): | |
if not regex: | |
return True | |
if regex[0] == '^': | |
return matchFirst(text, regex[1:]) | |
else: | |
return matchInString(text, regex) | |
def matchInString(text, regex): | |
if not regex: | |
return True | |
elif regex[0] == '$' and len(regex) == 1 and not text: | |
return True | |
elif matchFirst(text, regex): | |
return True | |
elif not text: | |
return False | |
else: | |
return matchInString(text[1:], regex) | |
def matchFirst(text, regex): | |
if not regex: | |
return True | |
elif regex[0] == '$' and len(regex) == 1 and not text: | |
return True | |
elif len(regex) > 1 and regex[1] == '*': | |
return matchStar(text, regex) | |
elif not text: | |
return False | |
elif regex[0] == '.': | |
return matchFirst(text[1:], regex[1:]) | |
elif text[0] == regex[0]: | |
return matchFirst(text[1:], regex[1:]) | |
else: | |
return False | |
def matchStar(text, regex): | |
# By trying match directly against the next regex | |
# character makes this non-greedy | |
if matchFirst(text, regex[2:]): | |
return True | |
elif not text: | |
return False | |
elif matchFirst(text[0], regex[0]): | |
return matchStar(text[1:], regex) | |
else: | |
return False | |
def test(expression, expected): | |
if not expression == expected: | |
raise AssertionError("Test failed, expected {} but got {}".format(expected, expression)) | |
# Simple word matches | |
test(match("",""), True) | |
test(match("a",""), True) | |
test(match("", "a"), False) | |
test(match("a","a"), True) | |
test(match("a","aa"), False) | |
test(match("ab","b"), True) | |
test(match("ab","ab"), True) | |
test(match("aba","ba"), True) | |
# Start anchor | |
test(match("", "^"), True) | |
test(match("ab","^a"), True) | |
test(match("ab","^b"), False) | |
test(match("ab","^ab"), True) | |
# End anchor | |
test(match("", "$"), True) | |
test(match("ab","a$"), False) | |
test(match("ab","b$"), True) | |
test(match("ab","ab$"), True) | |
# All anchors | |
test(match("", "^$"), True) | |
test(match("ab","^a$"), False) | |
test(match("ab","^b$"), False) | |
test(match("a","^a$"), True) | |
test(match("ab","^ab$"), True) | |
# Dot | |
test(match("a","."), True) | |
test(match("ab","a."), True) | |
test(match("ab",".b"), True) | |
test(match("ab",".."), True) | |
test(match("ab","..."), False) | |
# Star | |
test(match("", ".*"), True) | |
test(match("aaa", "a*"), True) | |
test(match("aaa", ".*"), True) | |
test(match("aaa", "b*"), True) | |
test(match("aaa", "bb*"), False) | |
# For my sanity | |
test(match("Aye, tis the season", "^Ay.*se"), True) | |
test(match("Aye, tis the season", "^Ay.*sr"), False) | |
test(match("Aye, tis the season", "^Ay.*sr$"), False) | |
test(match("Aye, tis the season", "^Ay.*on$"), True) | |
test(match("Aye, tis the season", "Ay.*on"), True) | |
test(match("Aye, tis the season", "tis"), True) | |
test(match("Aye, tis the season", "tis.*ea"), True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment