Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@jonaslu
Last active July 9, 2016 19:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonaslu/241e658b6ed428f83910ea728ad9c349 to your computer and use it in GitHub Desktop.
Save jonaslu/241e658b6ed428f83910ea728ad9c349 to your computer and use it in GitHub Desktop.
Python version of the regex-engine in Pike and Kerningham: Practice of programming.
def match(text, regex):
if not regex:
return True
if regex[0] == '^':
return matchFirst(text, regex[1:])
else:
return matchInString(text, regex)
def matchInString(text, regex):
if not regex:
return True
elif regex[0] == '$' and len(regex) == 1 and not text:
return True
elif matchFirst(text, regex):
return True
elif not text:
return False
else:
return matchInString(text[1:], regex)
def matchFirst(text, regex):
if not regex:
return True
elif regex[0] == '$' and len(regex) == 1 and not text:
return True
elif len(regex) > 1 and regex[1] == '*':
return matchStar(text, regex)
elif not text:
return False
elif regex[0] == '.':
return matchFirst(text[1:], regex[1:])
elif text[0] == regex[0]:
return matchFirst(text[1:], regex[1:])
else:
return False
def matchStar(text, regex):
# By trying match directly against the next regex
# character makes this non-greedy
if matchFirst(text, regex[2:]):
return True
elif not text:
return False
elif matchFirst(text[0], regex[0]):
return matchStar(text[1:], regex)
else:
return False
def test(expression, expected):
if not expression == expected:
raise AssertionError("Test failed, expected {} but got {}".format(expected, expression))
# Simple word matches
test(match("",""), True)
test(match("a",""), True)
test(match("", "a"), False)
test(match("a","a"), True)
test(match("a","aa"), False)
test(match("ab","b"), True)
test(match("ab","ab"), True)
test(match("aba","ba"), True)
# Start anchor
test(match("", "^"), True)
test(match("ab","^a"), True)
test(match("ab","^b"), False)
test(match("ab","^ab"), True)
# End anchor
test(match("", "$"), True)
test(match("ab","a$"), False)
test(match("ab","b$"), True)
test(match("ab","ab$"), True)
# All anchors
test(match("", "^$"), True)
test(match("ab","^a$"), False)
test(match("ab","^b$"), False)
test(match("a","^a$"), True)
test(match("ab","^ab$"), True)
# Dot
test(match("a","."), True)
test(match("ab","a."), True)
test(match("ab",".b"), True)
test(match("ab",".."), True)
test(match("ab","..."), False)
# Star
test(match("", ".*"), True)
test(match("aaa", "a*"), True)
test(match("aaa", ".*"), True)
test(match("aaa", "b*"), True)
test(match("aaa", "bb*"), False)
# For my sanity
test(match("Aye, tis the season", "^Ay.*se"), True)
test(match("Aye, tis the season", "^Ay.*sr"), False)
test(match("Aye, tis the season", "^Ay.*sr$"), False)
test(match("Aye, tis the season", "^Ay.*on$"), True)
test(match("Aye, tis the season", "Ay.*on"), True)
test(match("Aye, tis the season", "tis"), True)
test(match("Aye, tis the season", "tis.*ea"), True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment