Created
October 29, 2017 03:54
-
-
Save tinvernizzi/703e94bf1cdd7d8452393c797df2670c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
import sys | |
numpy.set_printoptions(threshold=numpy.nan) | |
seq1 ='MGEIGFTEKQEALVKESWEILKQDIPKYSLHFFSQILEIAPAAKGLFSFLRDSDEVPHNNPKLKAHAVKVFKMTCETAIQLREEGKVVVADTTLQYLGSIHLKSGVIDPHFEVVKEALLRTLKEGGEKYNEEVEGAWSQAYDHLALAIKTEMKQEES' | |
seq2 ='MEKVPGEMEIERRERSEELSEAERKAVQATWARLYANCEDVGVAILVRFFVNFPSAKQYFSQFKHMEEPLEMERSPQLRKHACRVMGALNTVVENLHDPEKVSSVLSLVGKAHALKHKVEPVYFKLSGVILEVIAEEFANDFPPETQRAWAKLRGLIYSHVTAAYKEVGWVQQVPNATTPPATLPSSGP' | |
seq3 ='MVLSAADKNNVKGIFTKIAGHAEEYGAETLERMFTTYPPTKTYFPHFDLSHGSAQIKGHGKKVVAALIEAANHIDDIAGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPAALTPEVHASLDKFLCAVGTVLTAKYR' | |
seq4 ='MGLSDGEWQLVLNVWGKVEADIPGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASEDLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISECIIQVLQSKHPGDFGADAQGAMNKALELFRKDMASNYKELGFQG' | |
seq5 ='MERLESELIRQSWRAVSRSPLEHGTVLFSRLFALEPSLLPLFQYNGRQFSSPEDCLSSPEFLDHIRKVMLVIDAAVTNVEDLSSLEEYLATLGRKHRAVGVRLSSFSTVGESLLYMLEKCLGPDFTPATRTAWSQLYGAVVQAMSRGWDGE' | |
def read_blosum(): | |
with open('BLOSUM62.txt') as f: | |
fileContent = f.readlines() | |
global alignmentScore | |
alignmentScore = [] | |
for i in range (1, len(fileContent)): | |
alignmentScore = alignmentScore + [ fileContent[i].split() ] | |
alignmentScore = [line[1:] for line in alignmentScore] | |
global alignmentIndices | |
alignmentIndices = fileContent[0].split() | |
def get_score(a, b): | |
indice_a = -1 | |
indice_b = -1 | |
for i in range(0, len(alignmentIndices)): | |
if (a == alignmentIndices[i]): | |
indice_a = i | |
if (b == alignmentIndices[i]): | |
indice_b = i | |
if (a == b): | |
return 4 | |
else: | |
return 1 | |
#return int(alignmentScore[indice_a][indice_b]) | |
def penalite_affine(mot1, mot2, a=10, b=1): | |
v = numpy.zeros((len(mot1) + 1, len(mot2) + 1)).astype(int) # Matrice des couts des meilleurs alignements qui depend des 3 autres matrices suivantes : | |
e = numpy.zeros((len(mot1) + 1, len(mot2) + 1)).astype(int) # Matrice des couts des meilleurs alignements entre xi et un gap | |
f = numpy.zeros((len(mot1) + 1, len(mot2) + 1)).astype(int) # Matrice des couts des meilleurs alignements entre yj et un gap | |
g = numpy.zeros((len(mot1) + 1, len(mot2) + 1)).astype(int) # Matrice des couts des meilleurs alignements entre xi et yj | |
directions = numpy.zeros((len(mot1) + 1, len(mot2) + 1, 3)).astype(int) # Stockage des fleches de directions | |
#Initialisation des matrices | |
for i in range(1, (len(mot1) + 1)): | |
e[i][0] = - 99 | |
f[i][0] = - a - i * b | |
g[i][0] = - 99 | |
v[i][0] = - a - i * b | |
directions[i][0] = [0, 0, 1] | |
for j in range(1, (len(mot2) + 1)): | |
e[0][j] = - a - j * b | |
f[0][j] = - 99 | |
g[0][j] = - 99 | |
v[0][j] = - a - j * b | |
directions[0][j] = [1, 0, 0] | |
for i in range(1, (len(mot1) + 1)): | |
for j in range(1, (len(mot2) + 1)): | |
g[i][j] = v[i - 1][j - 1] + get_score(mot1[i - 1], mot2[j - 1]) | |
f[i][j] = max(f[i - 1][j] - (i * b), v[i - 1][j] - (a + (i * b)) ) | |
e[i][j] = max(e[i][j - 1] - (j * b), v[i][j - 1] - (a + (j * b)) ) | |
maxi = max(g[i][j], f[i][j], e[i][j]) | |
if (maxi == g[i][j]): | |
directions[i][j] = [0, 1, 0] | |
elif (maxi == f[i][j]): | |
directions[i][j] = [1, 0, 0] | |
elif (maxi == e[i][j]): | |
directions[i][j] = [0, 0, 1] | |
v[i][j] = maxi | |
str1 = '' | |
str2 = '' | |
x = len(mot1) - 1 | |
y = len(mot2) - 1 | |
while (x != -1 and y != -1): | |
if numpy.array_equal(directions[x][y], [0, 0, 1]): | |
str1 = str1 + mot1[x] | |
str2 = str2 + "_" | |
x = x - 1 | |
elif numpy.array_equal(directions[x][y], [0, 1, 0]): #diagonale | |
str1 = str1 + mot1[x] | |
str2 = str2 + mot2[y] | |
x = x - 1 | |
y = y - 1 | |
elif numpy.array_equal(directions[x][y], [1, 0, 0]): #haut | |
str1 = str1 + "_" | |
str2 = str2 + mot2[y] | |
y = y - 1 | |
if numpy.array_equal(directions[x][y], [0, 0, 0]): | |
x = x - 1 | |
y = y - 1 | |
while (x >= 0): | |
str1 = str1 + mot1[x] | |
str2 = str2 + "_" | |
x = x - 1 | |
while (y >= 0): | |
str1 = str1 + "_" | |
str2 = str2 + mot2[y] | |
y = y - 1 | |
print v | |
print directions | |
print g | |
print f | |
print e | |
print 'str1 : ' + str1[::-1] | |
print 'str2 : ' + str2[::-1] | |
print 'Score : ' + str(v[len(mot1)][len(mot2)]) | |
read_blosum() | |
if (len(sys.argv) == 3): | |
penalite_affine(sys.argv[1], sys.argv[2]) | |
elif (len(sys.argv) == 1): | |
try: | |
with open('sequences.fasta') as f: | |
sequenceFileContent = f.readlines() | |
except: | |
print 'Problem with file. Is the file sequences.fasta in the same folder as this python file ?' | |
sequences = [] | |
sequenceNumber = 0 | |
seq = '' | |
for i in range(len(sequenceFileContent)): | |
if (sequenceFileContent[i][0] == '>'): | |
if (sequenceNumber != 0): | |
sequences.append(seq) | |
sequenceNumber = sequenceNumber + 1 | |
seq = '' | |
else: | |
seq = seq + sequenceFileContent[i].replace('\n','') | |
if (seq != ''): | |
sequences.append(seq) | |
for i in range (len(sequences)): | |
for j in range (len(sequences)): | |
penalite_affine(sequences[i], sequences[j]) | |
else: | |
print 'Usage :\n' + ' python _.py <seq1> <seq2>\n' + ' python _.py <filename>' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment