Skip to content

Instantly share code, notes, and snippets.

@claymcleod
Last active August 29, 2015 14:08
Show Gist options
  • Save claymcleod/f9ce092cfed501408ee6 to your computer and use it in GitHub Desktop.
Save claymcleod/f9ce092cfed501408ee6 to your computer and use it in GitHub Desktop.
# Title: Edit Distance
# Authors: Clay McLeod
# Description: Python edit distance problem
# Section: Python
# Subsection: Interesting Problems
import sys
from tabulate import tabulate
def edit_distance(x, y):
"""---------------------------------------------------------------------
> Documentation
Author: Clay McLeod
Course: CSCI 533
Date: November 8, 2014
Assignment: 2
Method: edit_distance(x, y)
Description: Finds the shortest possible path between two strings using
dynamic programming techniques.
Input: 1) the starting word
2) the finishing word
Output: 1) the solution matrix of the dynamic programming technique
2) the corresponding operation matrix
NOTE: You will need to install the tabulate library for python to format
the output correctly. Tabulate is a library that will take a
matrix and format is correctly to Standard Out. You can install
tabulate by one of the following commands in your shell:
1) pip install tabulate
2) check the documentation at https://pypi.python.org/pypi/tabulate
Credit:
1) http://en.m.wikipedia.org/wiki/Levenshtein_distance helped clarify
how a dynamic solution looks for this problem. However,
all the code for this project was written by myself.
---------------------------------------------------------------------
"""
# preferences
COPY_COST = 0
TWIDDLE_COST = 1
REPLACE_COST = 1
INSERT_COST = 2
DELETE_COST = 2
solution_matrix = [[0 for i in range(0, len(y)+1)] for i in range(0, len(x)+1)]
operation_matrix = [['-' for i in range(0, len(y)+1)] for i in range(0, len(x)+1)]
for i in range(1, len(x)+1):
solution_matrix[i][0] = i
for j in range(1, len(y)+1):
solution_matrix[0][j] = j
# find length of input string
m = len(x)
n = len(y)
twiddles = []
# loop
for j in range(1, n+1):
for i in range(1, m+1):
if (i, j) in twiddles:
solution_matrix[i][j] = solution_matrix[i-1][j-1]
operation_matrix[i][j] = 'T'
twiddles.remove((i, j))
continue
elif x[i-1] == y[j-1]:
solution_matrix[i][j] = solution_matrix[i-1][j-1] + COPY_COST
else:
twiddle = solution_matrix[i-1][j-1] if i < m and j < n and x[i-1] == y[j] and x[i] == y[j-1] else 9999;
solution_matrix[i][j] = min([
solution_matrix[i-1][j-1] + REPLACE_COST,
twiddle + TWIDDLE_COST,
solution_matrix[i-1][j] + DELETE_COST,
solution_matrix[i][j-1] + INSERT_COST,
])
if twiddle != 9999:
twiddles.append((i+1, j+1))
# populate operation matrix
cost = solution_matrix[i][j]
if cost == solution_matrix[i-1][j-1]:
operation_matrix[i][j] = 'C'
elif cost == twiddle + TWIDDLE_COST:
operation_matrix[i][j] = 'T'
elif cost == solution_matrix[i-1][j-1] + REPLACE_COST:
operation_matrix[i][j] = 'R'
elif cost == solution_matrix[i-1][j-1] + DELETE_COST:
operation_matrix[i][j] = 'D'
elif cost == solution_matrix[i-1][j-1] + INSERT_COST:
operation_matrix[i][j] = 'I'
return solution_matrix, operation_matrix
def move_to_results_array(move):
if move == "T":
return "Twiddle"
elif move == "C":
return "Copy"
elif move == "R":
return "Replace"
elif move == "I":
return "Insert"
elif move == "D":
return "Delete"
else:
raise RuntimeError("Unknown Input!")
# boilerplate
if __name__ == "__main__":
# print documentation
print edit_distance.__doc__
# Input
original = list(str(raw_input('Enter is the original sequence: ')))
ending = list(str(raw_input('Enter is the ending sequence: ')))
# Calculation
solution_matrix, operation_matrix = edit_distance(original, ending)
print ''
# Output
print 'Solution Matrix:'
print tabulate(solution_matrix, tablefmt='plain')
print ''
print 'Operation Matrix:'
print tabulate(operation_matrix, tablefmt='plain')
# Messy solution for getting the order of operations, etc.
# Since this is not the real substance of the assignment, I
# wans't concerned about making it pretty.
i = 1
j = 1
results = []
results.append(move_to_results_array(operation_matrix[i][j]))
while i < len(original) and j < len(ending):
next_i = i + 1
next_j = j + 1
best_move = 9999;
if best_move > operation_matrix[i][j+1]:
next_i = i
next_j = j
best_move = operation_matrix[i][j+1]
next_j = j + 1
if best_move > operation_matrix[i+1][j]:
next_i = i
next_j = j
best_move =operation_matrix[i+1][j]
next_i = i + 1
if best_move > operation_matrix[i+1][j+1]:
next_i = i
next_j = j
best_move =operation_matrix[i+1][j+1]
i = i + 1
j = j + 1
i = next_i
j = next_j
results.append(move_to_results_array(operation_matrix[i][j]))
i = 0
print ''
print 'Solution steps:'
while i < len(results):
if results[i] == "Twiddle":
print '%d. %s | %s' % (i+1, results[i], "".join(ending[:i+2]))
del results[i]
else:
print '%d. %s | %s' % (i+1, results[i], "".join(ending[:i+1]))
i = i + 1
print '%d. Kill | %s' % (i+1, "".join(ending))
print ''
print 'Edit distance = %d' % solution_matrix[len(original)][len(ending)]
---------------------------------------------------------------------
> Documentation
Author: Clay McLeod
Course: CSCI 533
Date: November 8, 2014
Assignment: 2
Method: edit_distance(x, y)
Description: Finds the shortest possible path between two strings using
dynamic programming techniques.
Input: 1) the starting word
2) the finishing word
Output: 1) the solution matrix of the dynamic programming technique
2) the corresponding operation matrix
NOTE: You will need to install the tabulate library for python to format
the output correctly. Tabulate is a library that will take a
matrix and format is correctly to Standard Out. You can install
tabulate by one of the following commands in your shell:
1) pip install tabulate
2) check the documentation at https://pypi.python.org/pypi/tabulate
---------------------------------------------------------------------
Enter is the original sequence: hello
Enter is the ending sequence: helol
Solution Matrix:
0 1 2 3 4 5
1 0 2 3 4 5
2 2 0 2 4 5
3 3 2 0 2 4
4 4 4 2 1 2
5 5 5 4 2 1
Operation Matrix:
- - - - - -
- C R R R R
- R C C R R
- R C C C C
- R R C T C
- R R C C T
Solution steps:
1. Copy | h
2. Copy | he
3. Copy | hel
4. Twiddle | helol
5. Kill | helol
Edit distance = 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment