Skip to content

Instantly share code, notes, and snippets.

@suxue
Created March 19, 2014 09:09
Show Gist options
  • Save suxue/9638109 to your computer and use it in GitHub Desktop.
Save suxue/9638109 to your computer and use it in GitHub Desktop.
Calculate Edit Distance
#!/usr/bin/env python
# vim: set fileencoding=utf-8:
import sys
import numpy
import random
class Direction:
def __str__(self):
if self._dir == 0:
return '-'
elif self._dir == 1:
return '|'
elif self._dir == 2:
return '\\'
elif self._dir == 3:
return 'm'
else:
1/0
def __repr__(self):
return self.__str__()
def data(self):
return self._self
class Horizontal(Direction):
value = 0
def __init__(self):
self._dir = Horizontal.value
class Vertical(Direction):
value = 1
def __init__(self):
self._dir = Vertical.value
class Diagonal(Direction):
value = 2
def __init__(self):
self._dir = Diagonal.value
class Match(Direction):
value = 3
def __init__(self):
self._dir = Match.value
Directions = [Horizontal(), Vertical(), Diagonal(), Match()]
def default_printer(v):
return v.__str__()
def print_matrix(m, printer=default_printer):
for row in m:
for col in row:
sys.stdout.write("%3s" % printer(col))
sys.stdout.write("\n")
def initialize_distance_matrix(src_len, dest_len):
matrix = numpy.zeros([src_len + 1, dest_len + 1], dtype=numpy.int16)
for no, row in enumerate(matrix):
row[0] = no
for x, _ in enumerate(matrix[0]):
matrix[0][x] = x
return matrix
def initialize_action_matrix(src_len, dest_len):
actions = numpy.zeros([src_len + 1, dest_len + 1], dtype=numpy.int8)
for _, row in enumerate(actions):
row[0] = Vertical.value
for x, _ in enumerate(actions[0]):
actions[0][x] = Horizontal.value
actions[0][0] = -1 # no predecessor
return actions
def compute_edit_distance(src, dest):
matrix = initialize_distance_matrix(len(src), len(dest))
actions = initialize_action_matrix(len(src), len(dest))
def get_horizontal_predecessor(y, x):
return Horizontal.value, matrix[y][x-1] + 1
def get_vertical_predecessor(y, x):
return Vertical.value, matrix[y-1][x] + 1
def get_diagonal_predecessor(y, x):
src_char = src[y-1]
dest_char = dest[x-1]
if src_char == dest_char:
return Match.value, matrix[y-1][x-1],
else:
return Diagonal.value, matrix[y-1][x-1] + 2
# fill the tables
for y, row in enumerate(matrix[1:]):
y += 1
for x, col in enumerate(row[1:]):
x += 1
results = [get_horizontal_predecessor(y, x),
get_vertical_predecessor(y, x),
get_diagonal_predecessor(y, x)]
values = map(lambda v: v[1], results)
minval = min(values)
# deterministic way
# index = values.index(minval)
# randomly
indexes = []
for i, v in enumerate(values):
if v == minval:
indexes.append(i)
index = random.choice(indexes)
direction = results[index][0]
matrix[y][x] = minval
actions[y][x] = direction
print "\nedit distance is", matrix[-1][-1], '\n'
print 'DISTANCE MATRIX'
print_matrix(matrix)
print 'MOVEMENT MATRIX'
print_matrix(actions,
printer = lambda v: Directions[v] if v >= 0 else 'o')
print ''
# visualization
class Number:
def __init__(self, value):
self.value = value
def increase(self):
self.value += 1
def decrease(self):
self.value -= 1
y = Number(len(src))
x = Number(len(dest))
action_list = []
def move_up():
# delete from src
y.decrease()
char = src[y.value]
action_list.append(('Delete',))
def move_left():
# append to dest
x.decrease()
char = dest[x.value]
action_list.append(('Append', char,))
pass
def move_diagonal():
x.decrease()
y.decrease()
inchar = dest[x.value]
outchar = src[y.value]
action_list.append(('Change', outchar, inchar,))
def just_match():
x.decrease()
y.decrease()
action_list.append(('Match',))
movements = [move_left, move_up, move_diagonal, just_match]
while not (x.value == 0 and y.value == 0):
action = actions[y.value][x.value]
movements[action]()
action_list.reverse()
string = list(src)
char = ''
index = 0
red = '\033[31;1m'
green = '\033[32;1m'
yellow = '\033[33;1m'
gray = '\033[38;5;241m'
reset = '\033[0;m'
print "VISUALIZE EDITING SEQUENCE"
while len(action_list) > 0:
action = action_list.pop(0)
if (action[0] == 'Delete'):
before = ''.join(string[:index])
after = ''.join(string[index+1:])
char = string.pop(index)
print 'Delete ', before + red + char + gray + after + reset
elif (action[0] == 'Append'):
before = ''.join(string[:index])
after = ''.join(string[index:])
char = action[1]
string.insert(index, char)
index = index + 1
print 'Append ', before + green + char + gray + after + reset
elif (action[0] == 'Change'):
before = ''.join(string[:index])
after = ''.join(string[index+1:])
oldchar = string.pop(index)
newchar = action[2]
string.insert(index, newchar)
index = index + 1
print 'Change ', before + red + oldchar + green + newchar + gray + after + reset
elif (action[0] == 'Match'):
before = ''.join(string[:index])
after = ''.join(string[index+1:])
char = string[index]
index = index + 1
print 'Match ', before + yellow + char + gray + after + reset
print ''
def main(argv):
if len(argv) != 2:
sys.stderr.write("Usage: ed.py src dest\n")
exit(1)
else:
compute_edit_distance(argv[0], argv[1])
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment