Created
March 12, 2013 13:17
-
-
Save elais/5142798 to your computer and use it in GitHub Desktop.
An implementation of the smith waterman algorithm using google app engine and python. Mainpage code and webapp settings not included.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SmithWaterman(webapp.RequestHandler): | |
def post(self): | |
try: | |
gap = int(self.request.get('Gap')) # gap penalty... or bonus if you'd prefer. | |
matchScores = False | |
try: | |
match = int(self.request.get('Match')) | |
mismatch = int(self.request.get('Mismatch')) | |
matchScores = True | |
except: | |
pass | |
seq1 = str(self.request.get('Sequence1')) # first input sequence | |
seq2 = str(self.request.get('Sequence2')) # second input sequence | |
seq1 = ' ' + ''.join(seq1.split()) | |
seq2 = ' ' + ''.join(seq2.split()) | |
seq1 = list(seq1) | |
seq2 = list(seq2) | |
M = [[0 for i in seq2] for j in seq1] # An initial 2d array filled with zeros | |
I = range(len(seq1)) # this is pythonic way to iterate through the sequences, trust me. | |
J = range(len(seq2)) # | |
# Similarity scores based off of wikipedia example. | |
if matchScores == True: | |
similarityMatrix = \ | |
{'A': {'A': match, 'G':mismatch, 'C':mismatch, 'T':mismatch}, | |
'G': {'A': mismatch, 'G':match, 'C':mismatch, 'T':mismatch}, | |
'C': {'A': mismatch, 'G':mismatch, 'C':match, 'T':mismatch}, | |
'T': {'A': mismatch, 'G':mismatch, 'C':mismatch, 'T':match}} | |
else: | |
similarityMatrix = \ | |
{'A': {'A': 10, 'G': -1, 'C': -3, 'T': -4}, | |
'G': {'A': -1, 'G': 7, 'C': -5, 'T': -3}, | |
'C': {'A': -3, 'G': -5, 'C': 9, 'T': 0}, | |
'T': {'A': -4, 'G': -3, 'C': 0, 'T': 8}} | |
#Initialization of 2d array | |
for i in I: | |
M[i][0] = gap * i | |
for j in J: | |
M[0][j] = gap * j | |
#Scoring | |
for i in I[1:]: | |
for j in J[1:]: | |
Match = M[i-1][j-1] + similarityMatrix[seq1[i]][seq2[j]] | |
Delete = M[i-1][j] + gap | |
Insert = M[i][j-1] + gap | |
M[i][j] = max(Match, Insert, Delete, 0) | |
heap = [] | |
for i in I[1:]: | |
heap = heap + M[i] | |
topThree = heapq.nlargest(3, heap) | |
# Traceback | |
Alignments = [] | |
for k in topThree: | |
AlignmentA = "" | |
AlignmentB = "" | |
for m in range(len(seq1)): | |
for n in range(len(seq1)): | |
if M[m][n] == k: | |
i = m | |
j = n | |
while ( i > 0 and j > 0 ): | |
Score = M[i][j] | |
if Score == 0: | |
break | |
else: | |
ScoreDiag = M[i - 1][j - 1] | |
ScoreUp = M[i][j-1] | |
ScoreLeft = M[i-1][j] | |
if (Score == ScoreDiag + similarityMatrix[seq1[i]][seq2[j]]): | |
AlignmentA = seq1[i] + AlignmentA | |
AlignmentB = seq2[j] + AlignmentB | |
i -= 1 | |
j -= 1 | |
elif (Score == ScoreLeft + gap): | |
AlignmentA = seq1[i] + AlignmentA | |
AlignmentB = "-" + AlignmentB | |
i -= 1 | |
elif (Score == ScoreUp + gap): | |
AlignmentA = "-" + AlignmentA | |
AlignmentB = seq2[j] + AlignmentB | |
j -= 1 | |
else: | |
print("algorithm error?") | |
while (i > 0): | |
AlignmentA = seq1[i] + AlignmentA | |
AlignmentB = "-" + AlignmentB | |
i -= 1 | |
while (j > 0): | |
AlignmentA = "-" + AlignmentA | |
AlignmentB = seq2[j] + AlignmentB | |
j -= 1 | |
#Similarity | |
lengthA = len(AlignmentA) | |
lengthB = len(AlignmentB) | |
sim1 = "" | |
sim2 = "" | |
length0 = 0 | |
k = 0 | |
total = 0 | |
similarity = 0.0 | |
if (lengthA > lengthB): | |
sim1 = AlignmentA | |
sim2 = AlignmentB | |
length0 = lengthA | |
else: | |
sim1 = AlignmentB | |
sim2 = AlignmentA | |
length0 = lengthB | |
while (k < length0): | |
if (sim1[k] == sim2[k]): | |
total += 1 | |
k += 1 | |
if total == 0: | |
similarity = 0 | |
else: | |
similarity = float(total) / float(length0) * 100.0 | |
tupler = (k, AlignmentA, AlignmentB, similarity) | |
Alignments.append(tupler) | |
# Here is where we pretty the output of the algorithm. | |
self.response.out.write(""" | |
<html> | |
<body> | |
<p> | |
Score: %s | |
</p> | |
<p> | |
Alignment A: %s | |
</p> | |
<p> | |
Alignment B: %s | |
</p> | |
<p> | |
Similarity is: %s | |
</p> | |
</body> | |
</html> | |
"""%(Alignments[0][0], Alignments[0][1],Alignments[0][2],Alignments[0][3])) | |
self.response.out.write(""" | |
<html> | |
<body> | |
<p> | |
Score: %s | |
</p> | |
<p> | |
Alignment A: %s | |
</p> | |
<p> | |
Alignment B: %s | |
</p> | |
<p> | |
Similarity is: %s | |
</p> | |
</body> | |
</html> | |
"""%(Alignments[1][0], Alignments[1][1],Alignments[1][2],Alignments[1][3])) | |
self.response.out.write(""" | |
<html> | |
<body> | |
<p> | |
Score: %s | |
</p> | |
<p> | |
Alignment A: %s | |
</p> | |
<p> | |
Alignment B: %s | |
</p> | |
<p> | |
Similarity is: %s | |
</p> | |
</body> | |
</html> | |
"""%(Alignments[2][0], Alignments[2][1],Alignments[2][2],Alignments[2][3])) | |
except: | |
self.response.out.write(""" | |
<html> | |
<body> | |
<p> | |
Oh dear... Something went wrong. Make sure that your string only contains ATCG and that | |
your gap penalty is an integer (a whole number). Try again. Failing any of those things | |
I'll concede it's me, not you :). Also make sure there are absolutely NO spaces. | |
</p> | |
<a href="http://shortyurlshorty.appspot.com">Try again</a> | |
</body> | |
</html> | |
""") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment