Created
November 13, 2020 05:48
-
-
Save sudoaza/1d838119cec770b21779b4cd4bd9995e to your computer and use it in GitHub Desktop.
Align inputs and get mayority vote
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import pairwise2 module | |
from Bio import pairwise2 | |
# Import format_alignment method | |
from Bio.pairwise2 import format_alignment | |
import itertools | |
import glob | |
from scipy import stats as s | |
def get(list, index, default): | |
try: | |
return list[index] | |
except IndexError: | |
return default | |
def align(X, Y): | |
return pairwise2.align.globalms(X, Y, 1, 0, -1, -0.5) | |
def multi_align(seqs): | |
scores = {k:0 for k in seqs} | |
for X, Y in itertools.combinations(seqs, 2): | |
alignments = align(X, Y) | |
if len(alignments) == 0: | |
score = 0 | |
else: | |
score = alignments[0].score | |
scores[X] += score | |
scores[Y] += score | |
sorted_seqs = [s for s in reversed(sorted(scores, key=scores.get))] | |
main = None | |
# Chequear que no loopee aca | |
while (main != sorted_seqs[0]): | |
main = sorted_seqs[0] | |
for i in range(1, len(sorted_seqs)): | |
X = sorted_seqs[0] | |
Y = sorted_seqs[i] | |
alignments = align(X, Y) | |
max_score = 0 | |
best_align = None | |
for a in alignments: | |
if a.score > max_score: | |
max_score = a.score | |
best_align = a | |
sorted_seqs[0] = a.seqA | |
sorted_seqs[i] = a.seqB | |
return sorted_seqs | |
files = [] | |
for filepath in glob.iglob(r'input/*.txt'): | |
f = open(filepath, "r") | |
files.append(f.readlines()) | |
maxlines = max([len(lines) for lines in files]) | |
for line in range(maxlines): | |
lines = [ get(f, line, '').strip() for f in files] | |
while '' in lines: | |
lines.remove('') | |
aligned = multi_align(lines) | |
#_ = [print(l) for l in aligned] | |
clean = '' | |
for i in range(len(aligned[0])): | |
clean += s.mode( [get(seq,i,'-') for seq in aligned] ).mode[0] | |
#print("-"*79) | |
print(clean.replace('-','')) | |
#print("") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment