Created
December 28, 2019 02:22
-
-
Save meren/d2d046bb3f16429c95ba9cb4778b36f9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# lousy script that works with the outputs of `anvi-export-collection` | |
# to reconcstruct the fate of contigs for a given algorithm and bin and | |
# spit out some text to be visualized on https://app.rawgraphs.io/ | |
import sys | |
from collections import OrderedDict | |
import anvio.utils as u | |
from anvio.errors import ConfigError | |
G = lambda x: u.get_TAB_delimited_file_as_dictionary(x, no_header=True, column_names=["split_name", "bin_name"]) | |
# crappy way to do this, indeed, but it will suffice for today. | |
# you need to put the output of anvi-export-collection for each | |
# binning algorithm into this dict with the matching filename: | |
algorithms = OrderedDict({'CONCOCT': G('EP-CONCOCT_1500.txt'), | |
'MAXBIN2': G('EP-MAXBIN2_1500.txt'), | |
'METABAT2': G('EP-METABAT2_1500.txt'), | |
'DASTOOL': G('EP-DASTOOL_1500.txt')}) | |
def main(args): | |
algorithm = args.algorithm | |
bin_name = args.bin | |
if algorithm not in algorithms: | |
raise ConfigError("That algorithm we don't know. This is what we know: %s." % ', '.join(algorithms)) | |
sys.exit(-1) | |
split_names = set() | |
for split_name in algorithms[algorithm]: | |
if algorithms[algorithm][split_name]['bin_name'] == bin_name: | |
split_names.add(split_name) | |
print(','.join(['split'] + list(algorithms.keys()))) | |
for split_name in split_names: | |
print(','.join([split_name] + [algorithms[algorithm][split_name]['bin_name'] for algorithm in algorithms])) | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser("Give this guy one of the algorithms (it knows all about '%s') and a bin name,\ | |
get back an input data for an alluvial diagram that shows which bins contain\ | |
the splits in that bin..") | |
parser.add_argument('--algorithm', default=None, help="Which algorithm?") | |
parser.add_argument('--bin', default=None, help="Which bin name to start with?") | |
args = parser.parse_args() | |
try: | |
main(args) | |
except ConfigError as e: | |
print(e) | |
sys.exit(-1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
#Hello,
#I am having some trouble getting this script to run.
#I have modified the algorithms sections as follows
algorithms = OrderedDict({'ANVIO_CONCOCT': G('collection-Concoct.txt'),
'ANVIO_METABAT': G('collection-bbmap_Metabat2.txt'),
'MANUAL_METABAT': G('collection-manual_metabat_bins.txt'),
'DASTOOL': G('collection-dastool_bins.txt')})
#My files are located in the same folder as the script and were exported from anvio through anvi-export-collections.
collection-bbmap_Metabat2-info.txt collection-Concoct.txt collection-manual_metabat_bins-info.txt
collection-bbmap_Metabat2.txt collection-dastool_bins-info.txt collection-manual_metabat_bins.txt
collection-Concoct-info.txt collection-dastool_bins.txt
#I have been running the script as follows
python anvi-script-gen-alluvial.py
#I also had to add a line that imports anvio at the start of the script otherwise it was unable to find a module named anvio.utils.
import anvio
#Now the issue that I am running into is this:
Config Error: That algorithm we don't know. This is what we know: ANVIO_CONCOCT,
ANVIO_METABAT, MANUAL_METABAT, DASTOOL.
#So i guess it is getting stuck as that first if statement. I am not super familiar with python so any help would be greatly appreciated.
#Thanks !
#Claudia
#FIGURED IT OUT!
need to have --algorithm and --bin flags to run
for example
python anvi-script-gen-alluvial.py --algorithm ANVIO_CONCOCT --bin Bin_27