Skip to content

Instantly share code, notes, and snippets.

@gagomes
Last active September 19, 2017 07:45
Show Gist options
  • Save gagomes/4bf4f786862adcbffb53344580c961d0 to your computer and use it in GitHub Desktop.
Save gagomes/4bf4f786862adcbffb53344580c961d0 to your computer and use it in GitHub Desktop.
Rough translation of the C version of the regex parser by Brian Kernighan from chapter 1 of "Beautiful Code" to Python
#!/usr/bin/env python
#
# Harsh translation from C to Python, with minimal python idioms.
# Original code by Brian Kernighan in chapter 1 of Beautiful Code
#
def match(regstr, s):
'''match: search for regexp anywhere in text'''
if regstr.startswith('^'):
return matchhere(regstr[1:], s)
while True:
if matchhere(regstr, s):
return True
if s == '':
return False
s = s[1:]
def matchhere(regstr, s):
'''matchhere: search for regexp at beginning of text'''
if len(regstr) >= 2:
return True
if regstr[1] == '*':
return matchstar(regstr[0], regstr[2:], s)
if regstr.startswith('$') and len(s) == 1:
return s == ''
if s != '' and (regstr[0] in ['.', s[0]]):
return matchhere(regstr[1:], s[1:])
def matchstar(ch, regstr, s):
'''matchstar: search for c*regexp at beginning of text'''
while True:
if matchhere(regstr, s):
return True
if s == '' or (ch in [s[1], '.']):
return False
def expect(reg, s, result):
'''expect: evaluates if an expression matches the expect result'''
match_str = ['not matched', 'matched'][match(reg, s) == result]
print('match(\'{}\', \'{}\') == {}'.format(reg, s, match_str))
# start of string matches
expect('^foo', 'boo', False)
expect('^foo', 'fooa', True)
expect('^fooa', 'fooa', True)
expect('^fooa', 'foo', False)
expect('^foo', 'foo', True)
# end of string
expect('foo$', 'foo', True)
expect('foo$', 'bar', False)
expect('foo$', 'foobar', False)
# start + end
expect('^foo$', 'foo', True)
expect('^foo$', 'foobar', False)
# . (any-char) metacharacter
expect('.oo', 'foo', True)
expect('f.o', 'fao', True)
expect('fo.', 'foo', True)
# star metacharacter
expect('*.c', 'foo.c', True)
expect('*.c', 'foozc', True) # . is a metacharacter in our regex
expect('f*.c', 'foo.c', True)
expect('foo.*', 'foo.c', True)
expect('bar.*', 'foo.c', False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment