Skip to content

Instantly share code, notes, and snippets.

# lopespm/levenshtein_regex_dp.py Last active May 1, 2019

Levenshtein distance between regex expression and target string - Dynamic Programming (Python)
 # (Variant #4 for exercise 16.2 on EPI (Elements of Programming Interviews)) (September 2018 edition) # The core idea is calculate the levenshtein distance, while taking into account the special cases of the regex expression # *, +, ? and . were taken into account for the regex expression. Expression blocks are not supported # This algorithm uses dynamic programming, yielding a O(mn) time complexity, O(m) auxiliary space for cache # (m and n are the lengths of regex and target strings respectively) # # Version using recursion with memoization: https://gist.github.com/lopespm/53a215d0b2b0518b52b6bb6687bdaff6 def regex_dist(regex: str, target: str): current = [None] * (len(regex) + 1) for t_i in range(len(target) + 1): prev = [item for item in current] for r_i in range(len(regex) + 1): if (r_i == 0 and t_i == 0): current[r_i] = 0 elif (r_i == 0): current[r_i] = t_i elif (t_i == 0): i, counter = r_i - 1, 0 while i >= 0: char = regex[i] i -= 2 if (char == '?' or char == '*' or char == '+') else 1 counter += 1 if (not (char == '?' or char == '*')) else 0 current[r_i] = counter # Regex special cases elif (regex[r_i-1] == '.'): current[r_i] = prev[r_i - 1] elif (regex[r_i-1] == '+' or regex[r_i-1] == '*' or regex[r_i-1] == '?'): if (regex[r_i-1-1] == target[t_i-1]): if (regex[r_i-1] == '?'): current[r_i] = prev[r_i - 2] else: current[r_i] = min(prev[r_i - 2], prev[r_i]) else: additional_cost = 1 if (regex[r_i-1] == '+') else 0 current[r_i] = min(prev[r_i - 2] + 1, prev[r_i] + 1, current[r_i - 2] + additional_cost) # Other characters elif (regex[r_i-1] == target[t_i-1]): current[r_i] = prev[r_i - 1] else: current[r_i] = min(prev[r_i - 1] + 1, prev[r_i] + 1, current[r_i - 1] + 1) return current[-1] # Some examples print(regex_dist("OrchestraaaQA+a", "CarthorseQAAAA")) print(regex_dist("A+b", "AAb")) print(regex_dist("A+b", "AAAAAb")) print(regex_dist("A+b", "AAAAAb03b")) print(regex_dist("A..b", "AAAAAb03b")) print(regex_dist("q+", "A")) print(regex_dist("q+a?a+", "A")) print(regex_dist("q+a?a+A+", "A")) print(regex_dist("q+a?a+A+.", "A")) print(regex_dist("q+A", "AAAAAb03b"))
to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.