Skip to content

Instantly share code, notes, and snippets.

@mayhewsw
Created May 6, 2016 18:30
Show Gist options
  • Save mayhewsw/957661c5d52824af357bf5789dea8365 to your computer and use it in GitHub Desktop.
Save mayhewsw/957661c5d52824af357bf5789dea8365 to your computer and use it in GitHub Desktop.
Edit Distance
import numpy as np
import math
import sys
np.set_printoptions(linewidth=400)
def EditDistance(s, t):
# For all i and j, d[i,j] will hold the Levenshtein distance between
# the first i characters of s and the first j characters of t.
# Note that d has (m+1) x(n+1) values.
# let d be a 2-d array of int with dimensions [0..m, 0..n]
m = len(s)
n = len(t)
d = np.zeros((m+1, n+1))
for i in range(m+1):
# the distance of any first string to an empty second string
d[i][0] = 2*i
for j in range(n+1):
# the distance of any second string to an empty first string
d[0][j] = 2*j
for j in range(1, n+1):
for i in range(1, m+1):
if s[i-1] == t[j-1]:
# no operation required
d[i, j] = d[i-1, j-1]
else:
d[i, j] = min(d[i-1, j] + 2, d[i-1, j-1] + 3)
print "Final matrix:"
print d
# backtrace.
back = ""
i = m
j = n
ls = list(s)
lt = list(t)
while True:
if j == 0:
# print "deletion up"
i = i - 1
del ls[i]
elif i == 0:
# print "deletion over"
j = j - 1
del lt[j]
elif d[i-1, j] < d[i-1, j-1]+1:
# print "we did a deletion"
i = i-1
del ls[i]
else: # d[i-1, j-1] < d[i-1, j]:
# if d[i-1,j-1] == d[i][j]:
# print "copied"
# else:
# print "we did a substitution"
i = i-1
j = j-1
lt[j] = ls[i]
if i <= 0 and j <= 0:
break
print "Final strings:"
print "".join(ls)
print "".join(lt)
print "Final cost:"
return d[m,n]
if len(sys.argv) < 3:
print "Usage: python editdist.py <string1> <string2>"
else:
print EditDistance(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment