Skip to content

Instantly share code, notes, and snippets.

@tinvernizzi
Created October 29, 2017 03:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tinvernizzi/703e94bf1cdd7d8452393c797df2670c to your computer and use it in GitHub Desktop.
Save tinvernizzi/703e94bf1cdd7d8452393c797df2670c to your computer and use it in GitHub Desktop.
import numpy
import sys
numpy.set_printoptions(threshold=numpy.nan)
seq1 ='MGEIGFTEKQEALVKESWEILKQDIPKYSLHFFSQILEIAPAAKGLFSFLRDSDEVPHNNPKLKAHAVKVFKMTCETAIQLREEGKVVVADTTLQYLGSIHLKSGVIDPHFEVVKEALLRTLKEGGEKYNEEVEGAWSQAYDHLALAIKTEMKQEES'
seq2 ='MEKVPGEMEIERRERSEELSEAERKAVQATWARLYANCEDVGVAILVRFFVNFPSAKQYFSQFKHMEEPLEMERSPQLRKHACRVMGALNTVVENLHDPEKVSSVLSLVGKAHALKHKVEPVYFKLSGVILEVIAEEFANDFPPETQRAWAKLRGLIYSHVTAAYKEVGWVQQVPNATTPPATLPSSGP'
seq3 ='MVLSAADKNNVKGIFTKIAGHAEEYGAETLERMFTTYPPTKTYFPHFDLSHGSAQIKGHGKKVVAALIEAANHIDDIAGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPAALTPEVHASLDKFLCAVGTVLTAKYR'
seq4 ='MGLSDGEWQLVLNVWGKVEADIPGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASEDLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISECIIQVLQSKHPGDFGADAQGAMNKALELFRKDMASNYKELGFQG'
seq5 ='MERLESELIRQSWRAVSRSPLEHGTVLFSRLFALEPSLLPLFQYNGRQFSSPEDCLSSPEFLDHIRKVMLVIDAAVTNVEDLSSLEEYLATLGRKHRAVGVRLSSFSTVGESLLYMLEKCLGPDFTPATRTAWSQLYGAVVQAMSRGWDGE'
def read_blosum():
with open('BLOSUM62.txt') as f:
fileContent = f.readlines()
global alignmentScore
alignmentScore = []
for i in range (1, len(fileContent)):
alignmentScore = alignmentScore + [ fileContent[i].split() ]
alignmentScore = [line[1:] for line in alignmentScore]
global alignmentIndices
alignmentIndices = fileContent[0].split()
def get_score(a, b):
indice_a = -1
indice_b = -1
for i in range(0, len(alignmentIndices)):
if (a == alignmentIndices[i]):
indice_a = i
if (b == alignmentIndices[i]):
indice_b = i
if (a == b):
return 4
else:
return 1
#return int(alignmentScore[indice_a][indice_b])
def penalite_affine(mot1, mot2, a=10, b=1):
v = numpy.zeros((len(mot1) + 1, len(mot2) + 1)).astype(int) # Matrice des couts des meilleurs alignements qui depend des 3 autres matrices suivantes :
e = numpy.zeros((len(mot1) + 1, len(mot2) + 1)).astype(int) # Matrice des couts des meilleurs alignements entre xi et un gap
f = numpy.zeros((len(mot1) + 1, len(mot2) + 1)).astype(int) # Matrice des couts des meilleurs alignements entre yj et un gap
g = numpy.zeros((len(mot1) + 1, len(mot2) + 1)).astype(int) # Matrice des couts des meilleurs alignements entre xi et yj
directions = numpy.zeros((len(mot1) + 1, len(mot2) + 1, 3)).astype(int) # Stockage des fleches de directions
#Initialisation des matrices
for i in range(1, (len(mot1) + 1)):
e[i][0] = - 99
f[i][0] = - a - i * b
g[i][0] = - 99
v[i][0] = - a - i * b
directions[i][0] = [0, 0, 1]
for j in range(1, (len(mot2) + 1)):
e[0][j] = - a - j * b
f[0][j] = - 99
g[0][j] = - 99
v[0][j] = - a - j * b
directions[0][j] = [1, 0, 0]
for i in range(1, (len(mot1) + 1)):
for j in range(1, (len(mot2) + 1)):
g[i][j] = v[i - 1][j - 1] + get_score(mot1[i - 1], mot2[j - 1])
f[i][j] = max(f[i - 1][j] - (i * b), v[i - 1][j] - (a + (i * b)) )
e[i][j] = max(e[i][j - 1] - (j * b), v[i][j - 1] - (a + (j * b)) )
maxi = max(g[i][j], f[i][j], e[i][j])
if (maxi == g[i][j]):
directions[i][j] = [0, 1, 0]
elif (maxi == f[i][j]):
directions[i][j] = [1, 0, 0]
elif (maxi == e[i][j]):
directions[i][j] = [0, 0, 1]
v[i][j] = maxi
str1 = ''
str2 = ''
x = len(mot1) - 1
y = len(mot2) - 1
while (x != -1 and y != -1):
if numpy.array_equal(directions[x][y], [0, 0, 1]):
str1 = str1 + mot1[x]
str2 = str2 + "_"
x = x - 1
elif numpy.array_equal(directions[x][y], [0, 1, 0]): #diagonale
str1 = str1 + mot1[x]
str2 = str2 + mot2[y]
x = x - 1
y = y - 1
elif numpy.array_equal(directions[x][y], [1, 0, 0]): #haut
str1 = str1 + "_"
str2 = str2 + mot2[y]
y = y - 1
if numpy.array_equal(directions[x][y], [0, 0, 0]):
x = x - 1
y = y - 1
while (x >= 0):
str1 = str1 + mot1[x]
str2 = str2 + "_"
x = x - 1
while (y >= 0):
str1 = str1 + "_"
str2 = str2 + mot2[y]
y = y - 1
print v
print directions
print g
print f
print e
print 'str1 : ' + str1[::-1]
print 'str2 : ' + str2[::-1]
print 'Score : ' + str(v[len(mot1)][len(mot2)])
print
read_blosum()
if (len(sys.argv) == 3):
penalite_affine(sys.argv[1], sys.argv[2])
elif (len(sys.argv) == 1):
try:
with open('sequences.fasta') as f:
sequenceFileContent = f.readlines()
except:
print 'Problem with file. Is the file sequences.fasta in the same folder as this python file ?'
sequences = []
sequenceNumber = 0
seq = ''
for i in range(len(sequenceFileContent)):
if (sequenceFileContent[i][0] == '>'):
if (sequenceNumber != 0):
sequences.append(seq)
sequenceNumber = sequenceNumber + 1
seq = ''
else:
seq = seq + sequenceFileContent[i].replace('\n','')
if (seq != ''):
sequences.append(seq)
for i in range (len(sequences)):
for j in range (len(sequences)):
penalite_affine(sequences[i], sequences[j])
else:
print 'Usage :\n' + ' python _.py <seq1> <seq2>\n' + ' python _.py <filename>'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment