Skip to content

Instantly share code, notes, and snippets.

@funderburkjim
Created April 17, 2015 02:28
Show Gist options
  • Save funderburkjim/c438d0089a15676a1963 to your computer and use it in GitHub Desktop.
Save funderburkjim/c438d0089a15676a1963 to your computer and use it in GitHub Desktop.
Edit distance module used by fuzzyalpha.py
"""Levenshtein distance between 2 strings.
Source: http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python, first version
Jan 5, 2015. This file copied from TM2013/vcpte/ejf/vac-vcp-cmp1/ folder.
"""
def levenshtein1(s1, s2,m):
# returns levenshtein distance, but returns m if
# the distance would be greater than or equal to m.
# this is done for efficiency in tests such as
# if levenshtein1(s1, s2,m) < m:
# returns -1 if distance is > m
dbg=False
if dbg:
print "s1=%s (%s), and s2=%s (%s)" % (s1,len(s1),s2,len(s2))
if len(s1) < len(s2):
return levenshtein1(s2, s1,m)
# len(s1) >= len(s2)
if len(s2) == 0:
return len(s1)
if (len(s1) - len(s2))> m: # first optimization. Corrected Jan 5, 2015
return -1
previous_row = xrange(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
deletions = current_row[j] + 1 # than s2
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
# experience shows the following is not an optimization, but changes
# the result. It must be that the last value in previous_row can
# decrease as well as increase! (This needs further examination)
#if previous_row[-1] >= m: # second optimization
# return m
ans = previous_row[-1]
if ans > m:
ans = -1
return ans
def levenshtein(s1, s2):
if len(s1) < len(s2):
return levenshtein(s2, s1)
# len(s1) >= len(s2)
if len(s2) == 0:
return len(s1)
previous_row = xrange(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
deletions = current_row[j] + 1 # than s2
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment