Skip to content

Instantly share code, notes, and snippets.

@dloscutoff
Created June 21, 2021 19:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dloscutoff/0b60eac91398fc19c7ca91f1b409d490 to your computer and use it in GitHub Desktop.
Save dloscutoff/0b60eac91398fc19c7ca91f1b409d490 to your computer and use it in GitHub Desktop.
Given a simplified regex, output all (possibly infinitely many) strings that it fully matches.
import string
alphanumerics = string.ascii_letters + string.digits
inf = float("inf")
def parse(regex):
segments = []
regex += " "
for i, char in enumerate(regex):
if char in alphanumerics:
repetition_operator = regex[i+1] if regex[i+1] not in alphanumerics else " "
lower_bound = 0 if repetition_operator in "*?" else 1
upper_bound = inf if repetition_operator in "*+" else 1
segments.append((char, lower_bound, upper_bound))
else:
continue
return segments
def generate(regex_segments, length):
char, lower, upper = regex_segments[0]
if len(regex_segments) == 1:
if lower <= length <= upper:
yield char * length
else:
for first_count in range(length + 1):
if lower <= first_count <= upper:
for remainder in generate(regex_segments[1:], length - first_count):
yield char * first_count + remainder
def generate_all(regex_segments):
length = 0
while True:
for result in generate(regex_segments, length):
yield result
length += 1
if __name__ == "__main__":
test = parse("ab?c+d*")
print(test)
for match in generate_all(test):
print(match)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment