-
-
Save meren/d2d046bb3f16429c95ba9cb4778b36f9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
# lousy script that works with the outputs of `anvi-export-collection` | |
# to reconcstruct the fate of contigs for a given algorithm and bin and | |
# spit out some text to be visualized on https://app.rawgraphs.io/ | |
import sys | |
from collections import OrderedDict | |
import anvio.utils as u | |
from anvio.errors import ConfigError | |
G = lambda x: u.get_TAB_delimited_file_as_dictionary(x, no_header=True, column_names=["split_name", "bin_name"]) | |
# crappy way to do this, indeed, but it will suffice for today. | |
# you need to put the output of anvi-export-collection for each | |
# binning algorithm into this dict with the matching filename: | |
algorithms = OrderedDict({'CONCOCT': G('EP-CONCOCT_1500.txt'), | |
'MAXBIN2': G('EP-MAXBIN2_1500.txt'), | |
'METABAT2': G('EP-METABAT2_1500.txt'), | |
'DASTOOL': G('EP-DASTOOL_1500.txt')}) | |
def main(args): | |
algorithm = args.algorithm | |
bin_name = args.bin | |
if algorithm not in algorithms: | |
raise ConfigError("That algorithm we don't know. This is what we know: %s." % ', '.join(algorithms)) | |
sys.exit(-1) | |
split_names = set() | |
for split_name in algorithms[algorithm]: | |
if algorithms[algorithm][split_name]['bin_name'] == bin_name: | |
split_names.add(split_name) | |
print(','.join(['split'] + list(algorithms.keys()))) | |
for split_name in split_names: | |
print(','.join([split_name] + [algorithms[algorithm][split_name]['bin_name'] for algorithm in algorithms])) | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser("Give this guy one of the algorithms (it knows all about '%s') and a bin name,\ | |
get back an input data for an alluvial diagram that shows which bins contain\ | |
the splits in that bin..") | |
parser.add_argument('--algorithm', default=None, help="Which algorithm?") | |
parser.add_argument('--bin', default=None, help="Which bin name to start with?") | |
args = parser.parse_args() | |
try: | |
main(args) | |
except ConfigError as e: | |
print(e) | |
sys.exit(-1) |
If you can send me your files I can find out what is wrong with them. I'm sorry it's been frustrating.
Hi meren,
I finally figured out the correct format for alluvial illustrations (on "https://app.rawgraphs.io/").
Just need to generate a table with contig names on first column and bin names on the rest of columns. Something like the following:
Contigs | maxbin | metabat | metawrap | concoct
c_000000000502_split_00001 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
c_000000000512_split_00001 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
c_000000000571_split_00001 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
c_000000001297_split_00001 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
c_000000001297_split_00002 | maxbin_bin32 | metabat_bin63 | mv_bin31 | concoct_bin95
.......
There are actually multiple ways for doing this job.
Appreciate this idea for using alluvial figures to compare binning results.
Thanks!
#Hello,
#I am having some trouble getting this script to run.
#I have modified the algorithms sections as follows
algorithms = OrderedDict({'ANVIO_CONCOCT': G('collection-Concoct.txt'),
'ANVIO_METABAT': G('collection-bbmap_Metabat2.txt'),
'MANUAL_METABAT': G('collection-manual_metabat_bins.txt'),
'DASTOOL': G('collection-dastool_bins.txt')})
#My files are located in the same folder as the script and were exported from anvio through anvi-export-collections.
collection-bbmap_Metabat2-info.txt collection-Concoct.txt collection-manual_metabat_bins-info.txt
collection-bbmap_Metabat2.txt collection-dastool_bins-info.txt collection-manual_metabat_bins.txt
collection-Concoct-info.txt collection-dastool_bins.txt
#I have been running the script as follows
python anvi-script-gen-alluvial.py
#I also had to add a line that imports anvio at the start of the script otherwise it was unable to find a module named anvio.utils.
import anvio
#Now the issue that I am running into is this:
Config Error: That algorithm we don't know. This is what we know: ANVIO_CONCOCT,
ANVIO_METABAT, MANUAL_METABAT, DASTOOL.
#So i guess it is getting stuck as that first if statement. I am not super familiar with python so any help would be greatly appreciated.
#Thanks !
#Claudia
#FIGURED IT OUT!
need to have --algorithm and --bin flags to run
for example
python anvi-script-gen-alluvial.py --algorithm ANVIO_CONCOCT --bin Bin_27
Hi meren,
Really appreciate your prompt reply.
I changed the names in the script, like this:
algorithms = OrderedDict({'CONCOCT': G('concoct.txt'),
'MAXBIN2': G('maxbin.txt'),
'METABAT2': G('metabat.txt'),
'METAWRAP': G('metawrap.txt')})
But I still get 'ConfigError'. Should I add directory of the corresponding files? Right now .txt files and the script are in the same folder directory.
Thank you!