Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
#!/usr/bin/env python
# lousy script that works with the outputs of `anvi-export-collection`
# to reconcstruct the fate of contigs for a given algorithm and bin and
# spit out some text to be visualized on
import sys
from collections import OrderedDict
import anvio.utils as u
from anvio.errors import ConfigError
G = lambda x: u.get_TAB_delimited_file_as_dictionary(x, no_header=True, column_names=["split_name", "bin_name"])
# crappy way to do this, indeed, but it will suffice for today.
# you need to put the output of anvi-export-collection for each
# binning algorithm into this dict with the matching filename:
algorithms = OrderedDict({'CONCOCT': G('EP-CONCOCT_1500.txt'),
'MAXBIN2': G('EP-MAXBIN2_1500.txt'),
'METABAT2': G('EP-METABAT2_1500.txt'),
'DASTOOL': G('EP-DASTOOL_1500.txt')})
def main(args):
algorithm = args.algorithm
bin_name = args.bin
if algorithm not in algorithms:
raise ConfigError("That algorithm we don't know. This is what we know: %s." % ', '.join(algorithms))
split_names = set()
for split_name in algorithms[algorithm]:
if algorithms[algorithm][split_name]['bin_name'] == bin_name:
print(','.join(['split'] + list(algorithms.keys())))
for split_name in split_names:
print(','.join([split_name] + [algorithms[algorithm][split_name]['bin_name'] for algorithm in algorithms]))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser("Give this guy one of the algorithms (it knows all about '%s') and a bin name,\
get back an input data for an alluvial diagram that shows which bins contain\
the splits in that bin..")
parser.add_argument('--algorithm', default=None, help="Which algorithm?")
parser.add_argument('--bin', default=None, help="Which bin name to start with?")
args = parser.parse_args()
except ConfigError as e:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.