Skip to content

Instantly share code, notes, and snippets.

@sudoaza
Created November 13, 2020 05:48
Show Gist options
  • Save sudoaza/1d838119cec770b21779b4cd4bd9995e to your computer and use it in GitHub Desktop.
Save sudoaza/1d838119cec770b21779b4cd4bd9995e to your computer and use it in GitHub Desktop.
Align inputs and get mayority vote
# Import pairwise2 module
from Bio import pairwise2
# Import format_alignment method
from Bio.pairwise2 import format_alignment
import itertools
import glob
from scipy import stats as s
def get(list, index, default):
try:
return list[index]
except IndexError:
return default
def align(X, Y):
return pairwise2.align.globalms(X, Y, 1, 0, -1, -0.5)
def multi_align(seqs):
scores = {k:0 for k in seqs}
for X, Y in itertools.combinations(seqs, 2):
alignments = align(X, Y)
if len(alignments) == 0:
score = 0
else:
score = alignments[0].score
scores[X] += score
scores[Y] += score
sorted_seqs = [s for s in reversed(sorted(scores, key=scores.get))]
main = None
# Chequear que no loopee aca
while (main != sorted_seqs[0]):
main = sorted_seqs[0]
for i in range(1, len(sorted_seqs)):
X = sorted_seqs[0]
Y = sorted_seqs[i]
alignments = align(X, Y)
max_score = 0
best_align = None
for a in alignments:
if a.score > max_score:
max_score = a.score
best_align = a
sorted_seqs[0] = a.seqA
sorted_seqs[i] = a.seqB
return sorted_seqs
files = []
for filepath in glob.iglob(r'input/*.txt'):
f = open(filepath, "r")
files.append(f.readlines())
maxlines = max([len(lines) for lines in files])
for line in range(maxlines):
lines = [ get(f, line, '').strip() for f in files]
while '' in lines:
lines.remove('')
aligned = multi_align(lines)
#_ = [print(l) for l in aligned]
clean = ''
for i in range(len(aligned[0])):
clean += s.mode( [get(seq,i,'-') for seq in aligned] ).mode[0]
#print("-"*79)
print(clean.replace('-',''))
#print("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment