Skip to content

Instantly share code, notes, and snippets.

@pratikone
Last active October 25, 2016 00:58
Show Gist options
  • Save pratikone/8ff6b84319fc9d390a2748542ffea531 to your computer and use it in GitHub Desktop.
Save pratikone/8ff6b84319fc9d390a2748542ffea531 to your computer and use it in GitHub Desktop.
Create blosum matrix with 5 alignments
##########################
# Author : Pratik Anand #
##########################
from sets import Set
from math import log
from decimal import *
from tabulate import tabulate
import copy
import global_alignment_DP as align
import affinity_global_align_DP as affine_align
getcontext().prec = 2
#sequences = [ "SALAK", "EHMFK", "QYCQS", "IRFLQ", "CDAIR" ]
#sequences = [ "KRAAE", "WNAQS", "RDAIE", "RDAIE", "RDAIE" ]
#sequences = [ "LRLLQ", "MSVVE", "NRDRV", "GRDRV", "IGAFY" ]
sequences = [ "AAPMG", "ATSMG", "AAPMG", "AAPMG", "AAPMG" ]
SIZE = 5
PRINT_TABLE = True
charSet = Set()
BLANK = ","
columns = []
def printTable( headerList, table ) :
new_table = copy.deepcopy(table)
for indx,innerRow in enumerate(new_table):
innerRow.insert( 0, headerList[indx])
print tabulate( new_table, headers=headerList, tablefmt="grid" )
for i in xrange(SIZE) :
innerCol = {}
for j in xrange(SIZE) :
char = sequences[j][i]
charSet.add(char) #add to set for making table
if char in innerCol :
innerCol[char] = innerCol[char] + 1
else :
innerCol[char] = 1
columns.append( innerCol )
print columns
combinations = {}
for col_id in xrange(SIZE):
dict_key_size = len(columns[col_id])
for i in xrange(dict_key_size) :
for j in xrange(i, dict_key_size) :
string = columns[col_id].keys()[i] + columns[col_id].keys()[j]
if string[0] == string[1] :
val = columns[col_id][string[0]]
c = (val * (val-1))/2
else :
val1 = columns[col_id][string[0]]
val2 = columns[col_id][string[1]]
c = val1 * val2
if string in combinations :
combinations[string] = combinations[string] + c
else :
combinations[string] = c
print combinations
print sorted(charSet)
c_i_j = []
for row in sorted(charSet) :
innerList = []
for col in sorted(charSet) :
stringCombo = row + col
rev_string_combo = col + row
if stringCombo in combinations or rev_string_combo in combinations : #last bug hopefully,
if stringCombo in combinations : #it wasn't accounting for AB and BA together
key = stringCombo
else :
key = rev_string_combo
val = combinations[ key ]
innerList.append(val)
print str(val) + BLANK,
else :
innerList.append(0) #won't be computed against
print "?" + BLANK,
c_i_j.append( innerList )
print "\n"
if PRINT_TABLE is True :
printTable( list(charSet), c_i_j )
print "============================================================="
q_i_j = []
for i,row in enumerate(sorted(charSet)) :
innerList = []
for j,col in enumerate(sorted(charSet)) :
stringCombo = row + col
val = c_i_j[i][j]
val = Decimal(val)/Decimal(50.0)
innerList.append(val)
print str(val) + BLANK,
q_i_j.append( innerList )
print "\n"
if PRINT_TABLE is True :
printTable( list(charSet), q_i_j )
print "============================================================="
p_i_j = {}
tot = 0
for i,row in enumerate(sorted(charSet)) :
sum = 0
for j,col in enumerate(sorted(charSet)) :
if row == col :
val = q_i_j[i][j]
else:
val = q_i_j[i][j]/2
sum = sum + val
#print row + " index : " + str(i) + " " + str(sum) + " " + str(val)
p_i_j[row] = sum
tot = tot+sum
print p_i_j
print tot
print "============================================================="
e_i_j = []
for i,row in enumerate(sorted(charSet)) :
innerList = []
for j,col in enumerate(sorted(charSet)) :
if row == col :
val = p_i_j[row]**2
else:
val = 2*p_i_j[row]*p_i_j[col]
print str(val) + BLANK,
innerList.append(val)
#print row + " index : " + str(i) + " " + str(sum) + " " + str(val)
e_i_j.append(innerList)
print "\n"
if PRINT_TABLE is True :
printTable( list(charSet), e_i_j )
print "============================================================="
blosum = []
for i,row in enumerate(sorted(charSet)) :
innerList = []
for j,col in enumerate(sorted(charSet)) :
if Decimal( e_i_j[i][j]) != Decimal(0.0) and Decimal( q_i_j[i][j]) != Decimal(0.0) :
val = int(round(2*log( Decimal( q_i_j[i][j] )/ Decimal( e_i_j[i][j] ), 2)))
else : val = "?"
print str(val) + BLANK,
innerList.append(val)
#print row + " index : " + str(i) + " " + str(sum) + " " + str(val)
blosum.append(innerList)
print "\n"
print "============================================================="
if PRINT_TABLE is True :
printTable( list(charSet), blosum )
print "============================================================="
affine_align.USE_BLOSUM = True
if True :
for firstSeq in xrange(len(sequences) - 1) :
for secondSeq in xrange(firstSeq+1, len(sequences)) :
affine_align.sequence_align(sequences[firstSeq], sequences[secondSeq], charSet, blosum)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment