Skip to content

Instantly share code, notes, and snippets.

@sminot
Last active July 11, 2016 18:23
Show Gist options
  • Save sminot/1ff7e5242ef2756405f2 to your computer and use it in GitHub Desktop.
Save sminot/1ff7e5242ef2756405f2 to your computer and use it in GitHub Desktop.
Download all results
#!/usr/bin/python
"""
One Codex CSV Download Script.
Simple 1 dependency (requests) Python 2/3 script for downloading
One Codex analysis results and saving them to CSVs, as well as read-level results
"""
from __future__ import print_function
import os
import requests
import argparse
import time
def api_wrapper(endpoint, api_key):
"""Get a result from the API."""
url = "https://app.onecodex.com/api/v0/{}".format(endpoint)
auth = (api_key, "")
r = requests.get(url, auth=auth)
if r.status_code == 200:
return r.json()
def download_results(api_key, folder, reads=False, retry=False):
"""Download all results to a folder."""
if not os.path.exists(folder):
os.mkdir(folder)
downloaded = set([]) # Keep list of downloaded analyses (uuids)
pending = True
while pending:
pending = False
for analysis, analysis_status in all_analyses(api_key): # Download all of the analyses
if analysis_status == 'Pending':
print("Analysis for {} is pending".format(analysis['sample_filename']))
if retry:
pending = True
elif analysis_status == 'Success':
if analysis['sample_id'] not in downloaded:
print("Getting sample information ({})".format(analysis['sample_id']))
sample_information = api_wrapper('samples/{}'.format(analysis['sample_id']), api_key)
upload_date = sample_information['upload_date'].split(' ')[0]
filename = "{}.{}.{}.csv".format(upload_date, analysis['sample_filename'], analysis['id'])
fpo = os.path.join(folder, filename)
if not os.path.exists(fpo):
analysis['results'] = api_wrapper('analyses/{}/extended_table'.format(analysis['id']),
api_key)
with open(fpo, 'w') as fo:
save_result(analysis, fo)
if reads:
# Download the raw read-level results
filename = "{}.{}.{}.reads.tsv.gz".format(upload_date, analysis['sample_filename'], analysis['id'])
fpo = os.path.join(folder, filename)
if not os.path.exists(fpo):
print("Fetching read-level results ({})".format(fpo))
url = 'https://app.onecodex.com/api/v0/analyses/{}/raw'.format(analysis['id'])
r = requests.get(url, auth=(api_key, ''), stream=True)
with open(fpo, 'wb') as fd:
for chunk in r.iter_content(1000):
fd.write(chunk)
downloaded.add(analysis['sample_id'])
if retry and pending:
print("Pending analyses remain, will check again in 60 seconds.")
time.sleep(60)
def all_analyses(api_key):
"""Get the list of analyses that have been performed."""
analyses = api_wrapper("analyses", api_key)
for analysis in analyses:
if analysis['reference_name'] != 'One Codex Database': # Only save results from OCDB
continue
yield analysis, analysis['analysis_status']
def save_result(analysis, fo, folder=None):
"""Write out the analysis analysis to an open file handle."""
base_string = '{},{},{},{},{},{},{},{}\n'
print("Saving results for {} ({})".format(analysis['sample_filename'], analysis['id']))
fo.write(base_string.format(
'Filename', 'Analysis ID', 'NCBI Tax ID', 'Organism Name',
'Rank', 'Percent of Total', 'Abundance',
'Readcount', 'Readcount with Children'))
for organism in analysis['results']:
fo.write(base_string.format(
analysis['sample_filename'], analysis['id'], organism['tax_id'], organism['name'],
organism['rank'], organism['pct_of_total'], organism.get('abundance', 0),
organism['readcount'], organism['readcount_w_children']))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="""
Download readcounts for each sample from One Codex to a single file
""")
parser.add_argument("api_key", type=str, default=None,
help="API key")
parser.add_argument("folder", type=str, default=None,
help="Write results to individual files in this folder")
parser.add_argument("--reads", action='store_true',
help="Also write out read-level results")
parser.add_argument("--retry", action='store_true',
help="Keep retrying every 60 seconds until all analyses are complete")
args = parser.parse_args()
download_results(args.api_key, args.folder, reads=args.reads, retry=args.retry)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment