Created
August 2, 2011 11:53
-
-
Save posativ/1120054 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def smithwaterman(str1='-TSASAKARA', str2='-TASASRAR'): | |
def trace(i,j, u): | |
if u == 0: | |
return '' | |
else: | |
if matrix[i-1][j-1] + similarity(str1[i], str2[j]) == u: | |
return trace(i-1,j-1, matrix[i-1][j-1]) + str1[i] + str2[j] | |
if matrix[i-1][j] + similarity(str1[i], '-') == u: | |
return trace(i-1, j, matrix[i-1][j]) + str1[i] + '-' | |
if matrix[i][j-1] + similarity('-', str2[j]) == u: | |
return trace(i,j-1, matrix[i][j-1]) + '-' + str2[j] | |
if (str1 and str1[0] != '-') or (str2 and str2[0] != '-'): | |
raise ValueError('You have to add an epsilon in front of your strings!') | |
m = len(str1) | |
n = len(str2) | |
matrix = [ [None for j in range(n)] for i in range(m)] | |
# erstes: zeile, zweites: spalte | |
# initialisierung | |
for i in range(m): | |
matrix[i][0] = 0 | |
for j in range(n): | |
matrix[0][j] = 0 | |
for i in range(1, m): | |
for j in range(1, n): | |
u = matrix[i-1][j-1] + similarity(str1[i], str2[j]) | |
v = matrix[i-1][j] + similarity(str1[i], '-') | |
w = matrix[i][j-1] + similarity('-', str2[j]) | |
matrix[i][j] = max(0, u, v, w) | |
print 'Matrix\n' | |
print ' ', ' '.join(c for c in str2) | |
for i in range(m): | |
print str1[i], ' '.join(str(x).rjust(2) for x in matrix[i]) | |
maximum = max(sum(matrix, [])) #flatten matrix, via <http://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python> | |
print maximum, u'ist der höchste Wert der Matrix.' | |
# traceback, known issue: gibt nur ein einziges Alignment aus | |
l = {} | |
for i in range(m, 0, -1): | |
for j in range(n, 0, -1): | |
if matrix[i-1][j-1] == maximum: | |
l[maximum] = (i-1,j-1) | |
l[18] = (9, 7) | |
print u'Ein lokales Alignment (von %s) mit der Ähnlichkeit von %s ist:\n' % (len(l), maximum) | |
for u, alignment in l.iteritems(): | |
i,j = alignment | |
result = trace(i,j, u) | |
print u, alignment | |
print result[::2] | |
print result[1::2] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment