Skip to content

Instantly share code, notes, and snippets.

@finswimmer
Created July 12, 2018 09:02
Show Gist options
  • Save finswimmer/db3e90eb0ef79a8ddb4ce82383800cdc to your computer and use it in GitHub Desktop.
Save finswimmer/db3e90eb0ef79a8ddb4ce82383800cdc to your computer and use it in GitHub Desktop.
from Bio import AlignIO
def column(alignment):
'''Generator for getting list of characters in each column in the alignment'''
for i in range(alignment.get_alignment_length()):
c = []
for record in alignment:
c.append(record.seq[i])
yield c
alignment = AlignIO.read(open("input.txt"), "phylip") # open phylip file
names = [record.id for record in alignment] # save IDs of the alignments
list2 = [] # list for columns with "-"
list3 = [] # list for columns without "-"
# Get each column in the alignment
# if it contains "-" add column to list2 otherwise to list3
for c in column(alignment):
if "-" in c:
list2.append(c)
else:
list3.append(c)
# write columns with "-" to file list2.txt
with open("list2.txt", "w") as outfile:
for i, line in enumerate(zip(*list2)):
outfile.write(names[i]+" "+"".join(line)+"\n")
# write columns without "-" to file list3.txt
with open("list3.txt", "w") as outfile:
for i, line in enumerate(zip(*list3)):
outfile.write(names[i]+" "+"".join(line)+"\n")
@finswimmer
Copy link
Author

Answer on biostars question: https://www.biostars.org/p/326044

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment