Skip to content

Instantly share code, notes, and snippets.

@elais
Created May 7, 2013 16:58
Show Gist options
  • Save elais/5534225 to your computer and use it in GitHub Desktop.
Save elais/5534225 to your computer and use it in GitHub Desktop.
BLOSUM62
class Blosum62(webapp.RequestHandler):
def post(self):
try:
gap = int(self.request.get('Gap')) # gap penalty... or bonus if you'd prefer.
seq1 = str(self.request.get('Sequence1')) # first input sequence
seq2 = str(self.request.get('Sequence2')) # second input sequence
seq1 = seq1.upper()
seq2 = seq2.upper()
seq1 = ' ' + ''.join(seq1.split())
seq2 = ' ' + ''.join(seq2.split())
seq1 = list(seq1)
seq2 = list(seq2)
M = [[0 for i in seq2] for j in seq1] # An initial 2d array filled with zeros
I = range(len(seq1)) # this is pythonic way to iterate through the sequences, trust me.
J = range(len(seq2)) #
Alignments = []
# Similarity scores based off of wikipedia example. Not anymore, Dr. Hill killed that dream.
similarityMatrix = \
{'C': {'C': 9, 'S': -1, 'T': -1, 'P': -3,'A': 0, 'G': -3, 'N': -3, 'D': -3, 'E': -4, 'Q': -3, 'H': -3, 'R': -3, 'K': -3, 'M': -1, 'I': -1, 'L': -1, 'V': -1, 'F': -2, 'Y': -2, 'W': -2},
'S': {'C': -1, 'S': 4,'T': 1, 'P': -1,'A': 1,'G': 0,'N': 1,'D': 0,'E': 0,'Q': 0, 'H': -1, 'R': -1,'K': 0, 'M': -1, 'I': -2, 'L': -2, 'V': -2, 'F': -2, 'Y': -2, 'W': -3},
'T': {'C': -1,'S': 1,'T': 4,'P': 1, 'A': -1,'G': 1,'N': 0,'D': 1,'E': 0,'Q': 0,'H': 0, 'R': -1,'K': 0, 'M': -1, 'I': -2, 'L': -2, 'V': -2, 'F': -2, 'Y': -2, 'W': -3},
'P': {'C': -3, 'S': -1,'T': 1,'P': 7, 'A': -1, 'G': -2, 'N': -1, 'D': -1, 'E': -1, 'Q': -1, 'H': -2, 'R': -2, 'K': -1, 'M': -2, 'I': -3, 'L': -3, 'V': -2, 'F': -4, 'Y': -3, 'W': -4},
'A': {'C': 0,'S': 1, 'T': -1, 'P': -1,'A': 4,'G': 0, 'N': -1, 'D': -2, 'E': -1, 'Q': -1, 'H': -2, 'R': -1, 'K': -1, 'M': -1, 'I': -1, 'L': -1, 'V': -2, 'F': -2, 'Y': -2, 'W': -3},
'G': {'C': -3,'S': 0,'T': 1, 'P': -2,'A': 0,'G': 6, 'N': -2, 'D': -1, 'E': -2, 'Q': -2, 'H': -2, 'R': -2, 'K': -2, 'M': -3, 'I': -4, 'L': -4,'V': 0, 'F': -3, 'Y': -3, 'W': -2},
'N': {'C': -3,'S': 1,'T': 0, 'P': -2, 'A': -2,'G': 0,'N': 6,'D': 1,'E': 0,'Q': 0, 'H': -1,'R': 0,'K': 0, 'M': -2, 'I': -3, 'L': -3, 'V': -3, 'F': -3, 'Y': -2, 'W': -4},
'D': {'C': -3,'S': 0,'T': 1, 'P': -1, 'A': -2, 'G': -1,'N': 1,'D': 6,'E': 2,'Q': 0, 'H': -1, 'R': -2, 'K': -1, 'M': -3, 'I': -3, 'L': -4, 'V': -3, 'F': -3, 'Y': -3, 'W': -4},
'E': {'C': -4,'S': 0,'T': 0, 'P': -1, 'A': -1, 'G': -2,'N': 0,'D': 2,'E': 5,'Q': 2,'H': 0,'R': 0,'K': 1, 'M': -2, 'I': -3, 'L': -3, 'V': -3, 'F': -3, 'Y': -2, 'W': -3},
'Q': {'C': -3,'S': 0,'T': 0, 'P': -1, 'A': -1, 'G': -2,'N': 0,'D': 0,'E': 2,'Q': 5,'H': 0,'R': 1,'K': 1,'M': 0, 'I': -3, 'L': -2, 'V': -2, 'F': -3, 'Y': -1, 'W': -2},
'H': {'C': -3, 'S': -1,'T': 0, 'P': -2, 'A': -2, 'G': -2,'N': 1,'D': 1,'E': 0,'Q': 0,'H': 8,'R': 0, 'K': -1, 'M': -2, 'I': -3, 'L': -3, 'V': -2, 'F': -1,'Y': 2, 'W': -2},
'R': {'C': -3, 'S': -1, 'T': -1, 'P': -2, 'A': -1, 'G': -2,'N': 0, 'D': -2,'E': 0,'Q': 1,'H': 0,'R': 5,'K': 2, 'M': -1, 'I': -3, 'L': -2, 'V': -3, 'F': -3, 'Y': -2, 'W': -3},
'K': {'C': -3,'S': 0,'T': 0, 'P': -1, 'A': -1, 'G': -2,'N': 0, 'D': -1,'E': 1,'Q': 1, 'H': -1,'R': 2,'K': 5, 'M': -1, 'I': -3, 'L': -2, 'V': -3, 'F': -3, 'Y': -2, 'W': -3},
'M': {'C': -1, 'S': -1, 'T': -1, 'P': -2, 'A': -1, 'G': -3, 'N': -2, 'D': -3, 'E': -2,'Q': 0, 'H': -2, 'R': -1, 'K': -1,'M': 5,'I': 1,'L': 2, 'V': -2,'F': 0, 'Y': -1, 'W': -1},
'I': {'C': -1, 'S': -2, 'T': -2, 'P': -3, 'A': -1, 'G': -4, 'N': -3, 'D': -3, 'E': -3, 'Q': -3, 'H': -3, 'R': -3, 'K': -3,'M': 1,'I': 4,'L': 2,'V': 1,'F': 0, 'Y': -1, 'W': -3},
'L': {'C': -1, 'S': -2, 'T': -2, 'P': -3, 'A': -1, 'G': -4, 'N': -3, 'D': -4, 'E': -3, 'Q': -2, 'H': -3, 'R': -2, 'K': -2,'M': 2,'I': 2,'L': 4,'V': 3,'F': 0, 'Y': -1, 'W': -2},
'V': {'C': -1, 'S': -2, 'T': -2, 'P': -2,'A': 0, 'G': -3, 'N': -3, 'D': -3, 'E': -2, 'Q': -2, 'H': -3, 'R': -3, 'K': -2,'M': 1,'I': 3,'L': 1,'V': 4, 'F': -1, 'Y': -1, 'W': -3},
'F': {'C': -2, 'S': -2, 'T': -2, 'P': -4, 'A': -2, 'G': -3, 'N': -3, 'D': -3, 'E': -3, 'Q': -3, 'H': -1, 'R': -3, 'K': -3,'M': 0,'I': 0,'L': 0, 'V': -1,'F': 6,'Y': 3,'W': 1},
'Y': {'C': -2, 'S': -2, 'T': -2, 'P': -3, 'A': -2, 'G': -3, 'N': -2, 'D': -3, 'E': -2, 'Q': -1,'H': 2, 'R': -2, 'K': -2, 'M': -1, 'I': -1, 'L': -1, 'V': -1,'F': 3,'Y': 7,'W': 2},
'W': {'C': -2, 'S': -3, 'T': -3, 'P': -4, 'A': -3, 'G': -2, 'N': -4, 'D': -4, 'E': -3, 'Q': -2, 'H': -2, 'R': -3, 'K': -3, 'M': -1, 'I': -3, 'L': -2, 'V': -3,'F': 1,'Y': 2, 'W': 11} }
#Initialization of 2d array
for i in I:
M[i][0] = max((gap * i), 0)
for j in J:
M[0][j] = max((gap* i), 0)
#Scoring
for i in I[1:]:
for j in J[1:]:
Match = M[i-1][j-1] + similarityMatrix[seq1[i]][seq2[j]]
Delete = M[i-1][j] + gap
Insert = M[i][j-1] + gap
M[i][j] = max(Match, Insert, Delete)
for i in I[1:]:
for j in J[1:]:
if M[i][j] < 1:
M[i][j] = 0
heap = []
for i in I[1:]:
heap = heap + M[i]
scoreList = heapq.nlargest(len(heap), heap)
# Traceback
for k in range(3):
AA = ""
AB = ""
i = 0
j = 0
countA = 0
countB = 0
AlignmentA = ""
AlignmentB = ""
for m in I[::-1]:
for n in J[::-1]:
if (M[m][n] == scoreList[0]):
i = m
j = n
# something
while ( i > 0 and j > 0 ):
Score = M[i][j]
if Score == 0:
break
else:
ScoreDiag = M[i - 1][j - 1]
ScoreUp = M[i][j-1]
ScoreLeft = M[i-1][j]
if (Score == ScoreDiag + similarityMatrix[seq1[i]][seq2[j]]):
AlignmentA = seq1[i] + AlignmentA
AlignmentB = seq2[j] + AlignmentB
i -= 1
j -= 1
elif (Score == ScoreLeft + gap):
# AlignmentA = seq1[i] + AlignmentA
# AlignmentB = "-" + AlignmentB
i -= 1
# countB += 1
elif (Score == ScoreUp + gap):
# AlignmentA = "-" + AlignmentA
# AlignmentB = seq2[j] + AlignmentB
j -= 1
# countA += 1
else:
print("algorithm error?")
M[i][j] == 0
while (i > 0):
# AlignmentA = seq1[i] + AlignmentA
# AlignmentB = "-" + AlignmentB
countB += 1
i -= 1
while (j > 0):
# AlignmentA = "-" + AlignmentA
# AlignmentB = seq2[j] + AlignmentB
countA += 1
j -= 1
#Similarity
# AlignmentA = AlignmentA.replace("-", "")
# AlignmentB = AlignmentB.replace("-", "")
lengthA = len(AlignmentA)
lengthB = len(AlignmentB)
sim1 = ""
sim2 = ""
length0 = 0
k = 0
total = 0
similarity = 0.0
if (lengthA > lengthB):
sim1 = AlignmentA
sim2 = AlignmentB
length0 = lengthA
else:
sim1 = AlignmentB
sim2 = AlignmentA
length0 = lengthB
while (k < length0):
if (sim1[k] == sim2[k]):
total += 1
k += 1
if total == 0:
similarity = 0
else:
similarity = float(total) / float(length0) * 100.0
topScore = scoreList[0]
scoreList = filter(lambda a: a != topScore, scoreList)
tupler = [scoreList[0], countA, AlignmentA, countB, AlignmentB, similarity]
Alignments.append(tupler)
# Here is where we pretty the output of the algorithm.
self.response.out.write("""
<html>
<body>
<p>
Score: %s
</p>
<p>
Starting at position %s in Alignment A: %s
</p>
<p>
Starting at position %s in Alignment B: %s
</p>
<p>
</p>
</body>
</html>
"""%(Alignments[0][0], Alignments[0][1],Alignments[0][2],Alignments[0][3], Alignments[0][4]))
self.response.out.write("""
<html>
<body>
<p>
Score: %s
</p>
<p>
Starting at position %s in Alignment A: %s
</p>
<p>
Starting at position %s in Alignment B: %s
</p>
<p>
</p>
</body>
</html>
"""%(Alignments[1][0], Alignments[1][1],Alignments[1][2],Alignments[1][3], Alignments[1][4]))
self.response.out.write("""
<html>
<body>
<p>
Score: %s
</p>
<p>
Starting at position %s in Alignment A: %s
</p>
<p>
Starting at position %s in Alignment B: %s
</p>
<p>
</p>
</body>
</html>
"""%(Alignments[2][0], Alignments[0][1],Alignments[0][2],Alignments[0][3], Alignments[0][4]))
except:
self.response.out.write("""
<html>
<body>
<p>
Oh dear... Something went wrong. Make sure that your string only contains ATCG and that
your gap penalty is an integer (a whole number). Try again. Failing any of those things
I'll concede it's me, not you :). Also make sure there are absolutely NO spaces.
</p>
<a href="http://shortyurlshorty.appspot.com">Try again</a>
</body>
</html>
""")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment