#!/usr/bin/env python
# lousy script that works with the outputs of `anvi-export-collection`
# to reconcstruct the fate of contigs for a given algorithm and bin and
# spit out some text to be visualized on
import sys
from collections import OrderedDict
import anvio.utils as u
from anvio.errors import ConfigError
G = lambda x: u.get_TAB_delimited_file_as_dictionary(x, no_header=True, column_names=["split_name", "bin_name"])
# crappy way to do this, indeed, but it will suffice for today.
# you need to put the output of anvi-export-collection for each
# binning algorithm into this dict with the matching filename:
algorithms = OrderedDict({'CONCOCT': G('EP-CONCOCT_1500.txt'),
'MAXBIN2': G('EP-MAXBIN2_1500.txt'),
'METABAT2': G('EP-METABAT2_1500.txt'),
'DASTOOL': G('EP-DASTOOL_1500.txt')})
def main(args):
algorithm = args.algorithm
bin_name = args.bin
if algorithm not in algorithms:
raise ConfigError("That algorithm we don't know. This is what we know: %s." % ', '.join(algorithms))
split_names = set()
for split_name in algorithms[algorithm]:
if algorithms[algorithm][split_name]['bin_name'] == bin_name:
print(','.join(['split'] + list(algorithms.keys())))
for split_name in split_names:
print(','.join([split_name] + [algorithms[algorithm][split_name]['bin_name'] for algorithm in algorithms]))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser("Give this guy one of the algorithms (it knows all about '%s') and a bin name,\
get back an input data for an alluvial diagram that shows which bins contain\
the splits in that bin..")
parser.add_argument('--algorithm', default=None, help="Which algorithm?")
parser.add_argument('--bin', default=None, help="Which bin name to start with?")
args = parser.parse_args()
except ConfigError as e:
newmacuser commented Apr 3, 2020

How can we use this script? I tried run in the collection export folder but it showed error: ConfigError("That algorithm we don't know. This is what we know: %s." % ', '.join(algorithms))

meren commented Apr 3, 2020

You need to update algorithms dictionary with the algorithm name and the corresponding file for it.

newmacuser commented Apr 3, 2020

Hi meren,

Really appreciate your prompt reply.

I changed the names in the script, like this:
algorithms = OrderedDict({'CONCOCT': G('concoct.txt'),
'MAXBIN2': G('maxbin.txt'),
'METABAT2': G('metabat.txt'),
'METAWRAP': G('metawrap.txt')})

But I still get 'ConfigError'. Should I add directory of the corresponding files? Right now .txt files and the script are in the same folder directory.

Thank you!

meren commented Apr 3, 2020

If you can send me your files I can find out what is wrong with them. I'm sorry it's been frustrating.

newmacuser commented Apr 3, 2020

Hi meren,

I finally figured out the correct format for alluvial illustrations (on "").

Just need to generate a table with contig names on first column and bin names on the rest of columns. Something like the following:
Contigs | maxbin | metabat | metawrap | concoct
c_000000000502_split_00001 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
c_000000000512_split_00001 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
c_000000000571_split_00001 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
c_000000001297_split_00001 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
c_000000001297_split_00002 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
There are actually multiple ways for doing this job.

Appreciate this idea for using alluvial figures to compare binning results.


cwood15 commented May 5, 2022


#I am having some trouble getting this script to run.

#I have modified the algorithms sections as follows

algorithms = OrderedDict({'ANVIO_CONCOCT': G('collection-Concoct.txt'),
'ANVIO_METABAT': G('collection-bbmap_Metabat2.txt'),
'MANUAL_METABAT': G('collection-manual_metabat_bins.txt'),
'DASTOOL': G('collection-dastool_bins.txt')})

#My files are located in the same folder as the script and were exported from anvio through anvi-export-collections.
collection-bbmap_Metabat2-info.txt collection-Concoct.txt collection-manual_metabat_bins-info.txt
collection-bbmap_Metabat2.txt collection-dastool_bins-info.txt collection-manual_metabat_bins.txt
collection-Concoct-info.txt collection-dastool_bins.txt

#I have been running the script as follows

#I also had to add a line that imports anvio at the start of the script otherwise it was unable to find a module named anvio.utils.
import anvio

#Now the issue that I am running into is this:
Config Error: That algorithm we don't know. This is what we know: ANVIO_CONCOCT,

#So i guess it is getting stuck as that first if statement. I am not super familiar with python so any help would be greatly appreciated.

#Thanks !

need to have --algorithm and --bin flags to run

for example
python --algorithm ANVIO_CONCOCT --bin Bin_27

