Skip to content

Instantly share code, notes, and snippets.

@marekborowiec
Last active March 29, 2016 17:23
Show Gist options
  • Save marekborowiec/05fca83396213391af840d48f7b2aa03 to your computer and use it in GitHub Desktop.
Save marekborowiec/05fca83396213391af840d48f7b2aa03 to your computer and use it in GitHub Desktop.
make fasta file with one line per taxon name and one line per sequence using AMAS
#! /usr/bin/env python3
from amas import AMAS
from glob import glob
# glob all fasta files into a list
in_fs = glob('*.fasta')
# get a list of alignments in
meta_aln = AMAS.MetaAlignment(in_files=in_fs, data_type="dna",in_format="fasta", cores=1)
# extract {taxon : seq} dictionaries
aln_dicts = meta_aln.get_parsed_alignments()
# for each alignment in the list of dicts (here only one)
for index, alignment in enumerate(aln_dicts):
# open file for writing
fn = 'non-interleaved{}.fas'.format(index + 1)
out_f = open(fn, "w")
# for each taxon name and sequence
for taxon, sequence in alignment.items():
# write on separate lines:
# taxon name and sequence
out_f.write('>{}\n{}\n'.format(taxon, sequence))
# close file
out_f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment