Created
May 7, 2013 16:58
-
-
Save elais/5534225 to your computer and use it in GitHub Desktop.
BLOSUM62
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Blosum62(webapp.RequestHandler): | |
def post(self): | |
try: | |
gap = int(self.request.get('Gap')) # gap penalty... or bonus if you'd prefer. | |
seq1 = str(self.request.get('Sequence1')) # first input sequence | |
seq2 = str(self.request.get('Sequence2')) # second input sequence | |
seq1 = seq1.upper() | |
seq2 = seq2.upper() | |
seq1 = ' ' + ''.join(seq1.split()) | |
seq2 = ' ' + ''.join(seq2.split()) | |
seq1 = list(seq1) | |
seq2 = list(seq2) | |
M = [[0 for i in seq2] for j in seq1] # An initial 2d array filled with zeros | |
I = range(len(seq1)) # this is pythonic way to iterate through the sequences, trust me. | |
J = range(len(seq2)) # | |
Alignments = [] | |
# Similarity scores based off of wikipedia example. Not anymore, Dr. Hill killed that dream. | |
similarityMatrix = \ | |
{'C': {'C': 9, 'S': -1, 'T': -1, 'P': -3,'A': 0, 'G': -3, 'N': -3, 'D': -3, 'E': -4, 'Q': -3, 'H': -3, 'R': -3, 'K': -3, 'M': -1, 'I': -1, 'L': -1, 'V': -1, 'F': -2, 'Y': -2, 'W': -2}, | |
'S': {'C': -1, 'S': 4,'T': 1, 'P': -1,'A': 1,'G': 0,'N': 1,'D': 0,'E': 0,'Q': 0, 'H': -1, 'R': -1,'K': 0, 'M': -1, 'I': -2, 'L': -2, 'V': -2, 'F': -2, 'Y': -2, 'W': -3}, | |
'T': {'C': -1,'S': 1,'T': 4,'P': 1, 'A': -1,'G': 1,'N': 0,'D': 1,'E': 0,'Q': 0,'H': 0, 'R': -1,'K': 0, 'M': -1, 'I': -2, 'L': -2, 'V': -2, 'F': -2, 'Y': -2, 'W': -3}, | |
'P': {'C': -3, 'S': -1,'T': 1,'P': 7, 'A': -1, 'G': -2, 'N': -1, 'D': -1, 'E': -1, 'Q': -1, 'H': -2, 'R': -2, 'K': -1, 'M': -2, 'I': -3, 'L': -3, 'V': -2, 'F': -4, 'Y': -3, 'W': -4}, | |
'A': {'C': 0,'S': 1, 'T': -1, 'P': -1,'A': 4,'G': 0, 'N': -1, 'D': -2, 'E': -1, 'Q': -1, 'H': -2, 'R': -1, 'K': -1, 'M': -1, 'I': -1, 'L': -1, 'V': -2, 'F': -2, 'Y': -2, 'W': -3}, | |
'G': {'C': -3,'S': 0,'T': 1, 'P': -2,'A': 0,'G': 6, 'N': -2, 'D': -1, 'E': -2, 'Q': -2, 'H': -2, 'R': -2, 'K': -2, 'M': -3, 'I': -4, 'L': -4,'V': 0, 'F': -3, 'Y': -3, 'W': -2}, | |
'N': {'C': -3,'S': 1,'T': 0, 'P': -2, 'A': -2,'G': 0,'N': 6,'D': 1,'E': 0,'Q': 0, 'H': -1,'R': 0,'K': 0, 'M': -2, 'I': -3, 'L': -3, 'V': -3, 'F': -3, 'Y': -2, 'W': -4}, | |
'D': {'C': -3,'S': 0,'T': 1, 'P': -1, 'A': -2, 'G': -1,'N': 1,'D': 6,'E': 2,'Q': 0, 'H': -1, 'R': -2, 'K': -1, 'M': -3, 'I': -3, 'L': -4, 'V': -3, 'F': -3, 'Y': -3, 'W': -4}, | |
'E': {'C': -4,'S': 0,'T': 0, 'P': -1, 'A': -1, 'G': -2,'N': 0,'D': 2,'E': 5,'Q': 2,'H': 0,'R': 0,'K': 1, 'M': -2, 'I': -3, 'L': -3, 'V': -3, 'F': -3, 'Y': -2, 'W': -3}, | |
'Q': {'C': -3,'S': 0,'T': 0, 'P': -1, 'A': -1, 'G': -2,'N': 0,'D': 0,'E': 2,'Q': 5,'H': 0,'R': 1,'K': 1,'M': 0, 'I': -3, 'L': -2, 'V': -2, 'F': -3, 'Y': -1, 'W': -2}, | |
'H': {'C': -3, 'S': -1,'T': 0, 'P': -2, 'A': -2, 'G': -2,'N': 1,'D': 1,'E': 0,'Q': 0,'H': 8,'R': 0, 'K': -1, 'M': -2, 'I': -3, 'L': -3, 'V': -2, 'F': -1,'Y': 2, 'W': -2}, | |
'R': {'C': -3, 'S': -1, 'T': -1, 'P': -2, 'A': -1, 'G': -2,'N': 0, 'D': -2,'E': 0,'Q': 1,'H': 0,'R': 5,'K': 2, 'M': -1, 'I': -3, 'L': -2, 'V': -3, 'F': -3, 'Y': -2, 'W': -3}, | |
'K': {'C': -3,'S': 0,'T': 0, 'P': -1, 'A': -1, 'G': -2,'N': 0, 'D': -1,'E': 1,'Q': 1, 'H': -1,'R': 2,'K': 5, 'M': -1, 'I': -3, 'L': -2, 'V': -3, 'F': -3, 'Y': -2, 'W': -3}, | |
'M': {'C': -1, 'S': -1, 'T': -1, 'P': -2, 'A': -1, 'G': -3, 'N': -2, 'D': -3, 'E': -2,'Q': 0, 'H': -2, 'R': -1, 'K': -1,'M': 5,'I': 1,'L': 2, 'V': -2,'F': 0, 'Y': -1, 'W': -1}, | |
'I': {'C': -1, 'S': -2, 'T': -2, 'P': -3, 'A': -1, 'G': -4, 'N': -3, 'D': -3, 'E': -3, 'Q': -3, 'H': -3, 'R': -3, 'K': -3,'M': 1,'I': 4,'L': 2,'V': 1,'F': 0, 'Y': -1, 'W': -3}, | |
'L': {'C': -1, 'S': -2, 'T': -2, 'P': -3, 'A': -1, 'G': -4, 'N': -3, 'D': -4, 'E': -3, 'Q': -2, 'H': -3, 'R': -2, 'K': -2,'M': 2,'I': 2,'L': 4,'V': 3,'F': 0, 'Y': -1, 'W': -2}, | |
'V': {'C': -1, 'S': -2, 'T': -2, 'P': -2,'A': 0, 'G': -3, 'N': -3, 'D': -3, 'E': -2, 'Q': -2, 'H': -3, 'R': -3, 'K': -2,'M': 1,'I': 3,'L': 1,'V': 4, 'F': -1, 'Y': -1, 'W': -3}, | |
'F': {'C': -2, 'S': -2, 'T': -2, 'P': -4, 'A': -2, 'G': -3, 'N': -3, 'D': -3, 'E': -3, 'Q': -3, 'H': -1, 'R': -3, 'K': -3,'M': 0,'I': 0,'L': 0, 'V': -1,'F': 6,'Y': 3,'W': 1}, | |
'Y': {'C': -2, 'S': -2, 'T': -2, 'P': -3, 'A': -2, 'G': -3, 'N': -2, 'D': -3, 'E': -2, 'Q': -1,'H': 2, 'R': -2, 'K': -2, 'M': -1, 'I': -1, 'L': -1, 'V': -1,'F': 3,'Y': 7,'W': 2}, | |
'W': {'C': -2, 'S': -3, 'T': -3, 'P': -4, 'A': -3, 'G': -2, 'N': -4, 'D': -4, 'E': -3, 'Q': -2, 'H': -2, 'R': -3, 'K': -3, 'M': -1, 'I': -3, 'L': -2, 'V': -3,'F': 1,'Y': 2, 'W': 11} } | |
#Initialization of 2d array | |
for i in I: | |
M[i][0] = max((gap * i), 0) | |
for j in J: | |
M[0][j] = max((gap* i), 0) | |
#Scoring | |
for i in I[1:]: | |
for j in J[1:]: | |
Match = M[i-1][j-1] + similarityMatrix[seq1[i]][seq2[j]] | |
Delete = M[i-1][j] + gap | |
Insert = M[i][j-1] + gap | |
M[i][j] = max(Match, Insert, Delete) | |
for i in I[1:]: | |
for j in J[1:]: | |
if M[i][j] < 1: | |
M[i][j] = 0 | |
heap = [] | |
for i in I[1:]: | |
heap = heap + M[i] | |
scoreList = heapq.nlargest(len(heap), heap) | |
# Traceback | |
for k in range(3): | |
AA = "" | |
AB = "" | |
i = 0 | |
j = 0 | |
countA = 0 | |
countB = 0 | |
AlignmentA = "" | |
AlignmentB = "" | |
for m in I[::-1]: | |
for n in J[::-1]: | |
if (M[m][n] == scoreList[0]): | |
i = m | |
j = n | |
# something | |
while ( i > 0 and j > 0 ): | |
Score = M[i][j] | |
if Score == 0: | |
break | |
else: | |
ScoreDiag = M[i - 1][j - 1] | |
ScoreUp = M[i][j-1] | |
ScoreLeft = M[i-1][j] | |
if (Score == ScoreDiag + similarityMatrix[seq1[i]][seq2[j]]): | |
AlignmentA = seq1[i] + AlignmentA | |
AlignmentB = seq2[j] + AlignmentB | |
i -= 1 | |
j -= 1 | |
elif (Score == ScoreLeft + gap): | |
# AlignmentA = seq1[i] + AlignmentA | |
# AlignmentB = "-" + AlignmentB | |
i -= 1 | |
# countB += 1 | |
elif (Score == ScoreUp + gap): | |
# AlignmentA = "-" + AlignmentA | |
# AlignmentB = seq2[j] + AlignmentB | |
j -= 1 | |
# countA += 1 | |
else: | |
print("algorithm error?") | |
M[i][j] == 0 | |
while (i > 0): | |
# AlignmentA = seq1[i] + AlignmentA | |
# AlignmentB = "-" + AlignmentB | |
countB += 1 | |
i -= 1 | |
while (j > 0): | |
# AlignmentA = "-" + AlignmentA | |
# AlignmentB = seq2[j] + AlignmentB | |
countA += 1 | |
j -= 1 | |
#Similarity | |
# AlignmentA = AlignmentA.replace("-", "") | |
# AlignmentB = AlignmentB.replace("-", "") | |
lengthA = len(AlignmentA) | |
lengthB = len(AlignmentB) | |
sim1 = "" | |
sim2 = "" | |
length0 = 0 | |
k = 0 | |
total = 0 | |
similarity = 0.0 | |
if (lengthA > lengthB): | |
sim1 = AlignmentA | |
sim2 = AlignmentB | |
length0 = lengthA | |
else: | |
sim1 = AlignmentB | |
sim2 = AlignmentA | |
length0 = lengthB | |
while (k < length0): | |
if (sim1[k] == sim2[k]): | |
total += 1 | |
k += 1 | |
if total == 0: | |
similarity = 0 | |
else: | |
similarity = float(total) / float(length0) * 100.0 | |
topScore = scoreList[0] | |
scoreList = filter(lambda a: a != topScore, scoreList) | |
tupler = [scoreList[0], countA, AlignmentA, countB, AlignmentB, similarity] | |
Alignments.append(tupler) | |
# Here is where we pretty the output of the algorithm. | |
self.response.out.write(""" | |
<html> | |
<body> | |
<p> | |
Score: %s | |
</p> | |
<p> | |
Starting at position %s in Alignment A: %s | |
</p> | |
<p> | |
Starting at position %s in Alignment B: %s | |
</p> | |
<p> | |
</p> | |
</body> | |
</html> | |
"""%(Alignments[0][0], Alignments[0][1],Alignments[0][2],Alignments[0][3], Alignments[0][4])) | |
self.response.out.write(""" | |
<html> | |
<body> | |
<p> | |
Score: %s | |
</p> | |
<p> | |
Starting at position %s in Alignment A: %s | |
</p> | |
<p> | |
Starting at position %s in Alignment B: %s | |
</p> | |
<p> | |
</p> | |
</body> | |
</html> | |
"""%(Alignments[1][0], Alignments[1][1],Alignments[1][2],Alignments[1][3], Alignments[1][4])) | |
self.response.out.write(""" | |
<html> | |
<body> | |
<p> | |
Score: %s | |
</p> | |
<p> | |
Starting at position %s in Alignment A: %s | |
</p> | |
<p> | |
Starting at position %s in Alignment B: %s | |
</p> | |
<p> | |
</p> | |
</body> | |
</html> | |
"""%(Alignments[2][0], Alignments[0][1],Alignments[0][2],Alignments[0][3], Alignments[0][4])) | |
except: | |
self.response.out.write(""" | |
<html> | |
<body> | |
<p> | |
Oh dear... Something went wrong. Make sure that your string only contains ATCG and that | |
your gap penalty is an integer (a whole number). Try again. Failing any of those things | |
I'll concede it's me, not you :). Also make sure there are absolutely NO spaces. | |
</p> | |
<a href="http://shortyurlshorty.appspot.com">Try again</a> | |
</body> | |
</html> | |
""") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment