Skip to content

Instantly share code, notes, and snippets.

@mokhdzanifaeq
Last active June 12, 2017 07:46
Show Gist options
  • Save mokhdzanifaeq/25088117c2933b3e8ee9a3bc53d3a24d to your computer and use it in GitHub Desktop.
Save mokhdzanifaeq/25088117c2933b3e8ee9a3bc53d3a24d to your computer and use it in GitHub Desktop.
satisfying regular expression using smt solver
import re
import sys
import string
from z3 import *
# hackish way to generate string that satisfy simple regex
# what's not supported: group reference & look behind
# usage: python regex.py PATTERN LENGTH
pattern = sys.argv[1]
length = int(sys.argv[2])
solver = Solver()
chars = [BitVec("%i" % i, 32) for i in xrange(length)]
categories = {
'category_digit': list(string.digits),
'category_not_digit': list(string.ascii_letters + string.punctuation),
'category_space': list(string.whitespace),
'category_not_space': list(string.printable.strip()),
'category_word': list(string.ascii_letters + string.digits + '_'),
'category_not_word': set(string.printable) - set(string.ascii_letters + string.digits + '_')
}
# solve recursively
def solve(regex, pos, fill = 1, step = 1):
if pos == length and regex:
if regex[0][1] != 'at_end':
raise Exception('Specified length is too short')
if not regex:
if pos != length and fill == 1:
return [chars[i] == 32 for i in xrange(pos, length)]
else:
return []
key, value = regex[0]
if key == 'literal':
expr = [chars[pos] == value]
elif key == 'not_literal':
expr = [chars[pos] != value]
elif key == 'in':
clause = solve(value, pos, 0, 0)
expr = [Not(Or(clause))] if value[0][0] == 'negate' else [Or(clause)]
elif key == 'any':
expr = [And(chars[pos] >= 33, chars[pos] <= 126)]
elif key == 'range':
expr = [And(chars[pos] >= value[0], chars[pos] <= value[1])]
elif key == 'category':
expr = [chars[pos] == ord(x) for x in categories[value]]
elif key == 'branch':
clause = []
for branch in value[1]:
inner_clause = solve(branch, pos, 0)
inner_clause += solve(regex[1:], pos + len(branch))
clause.append(And(inner_clause))
return [Or(clause)]
elif key == 'subpattern':
regex = regex[1:]
clause = list(value[1])
while regex:
clause.append(regex[0])
regex = regex[1:]
expr = solve(clause, pos)
pos = length
elif key == 'assert':
expr = solve(value[1], pos)
pos -= 1
fill = 0
elif key == 'assert_not':
expr = [Not(And(solve(value[1], pos)))]
pos -= 1
fill = 0
elif key == 'groupref':
# TODO
pass
elif key == 'max_repeat' or key == 'min_repeat':
clause = []
iteration = min(length, value[1]) + 1
for i in xrange(value[0], iteration):
tmp = list(value[2]) * i + list(regex[1:])
try: clause.append(And(solve(tmp, pos)))
except Exception: break
return [Or(clause)]
elif key == 'negate' or key == 'at':
expr = []
pos -= step
return expr + solve(regex[1:], pos + step, fill, step)
# all must be printable
for i in xrange(length):
solver.add(chars[i] >= 32, chars[i] <= 126)
regex = re.sre_parse.parse(pattern)
solver.add(solve(regex, 0))
print
if str(solver.check()) == 'sat':
model = solver.model()
answer = ['' for i in xrange(len(model))]
for d in model.decls():
answer[int(str(d.name()))]= chr(model[d].as_long())
print ''.join(answer)
else:
print 'unsat'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment