Skip to content

Instantly share code, notes, and snippets.

Created June 13, 2015 05:09
#!/usr/bin/python
"""
0. This script demonstrates to download data files of project with name "project_name" from your basespace account.
1. Get your access token from here by creating an application: https://developer.basespace.illumina.com/dashboard
The access token is a 32-bit long string.
2. For demonstration only this script only considers that you have less than 1000 projects in your account because of "Offset=0&Limit=1000"
If you have more than 1000 projects, use a loop with combination of "Offset" and "Limit".
3. No error handling. DIY if you want.
"""
import sys
import os
import urllib2
import json
import multiprocessing
ACCESS_TOKEN="thisisa32bitlongstringasyouraccesstoken"
BASE_URI = 'http://api.basespace.illumina.com/v1pre3/'
RESOURCE_COLLECTION_REQUESTS="?Offset=0&Limit=1024&access_token={}".format(ACCESS_TOKEN)
PROJECTS_GET_URI = 'users/current/projects'
SAMPLES_GET_URI = 'projects/{Id}/samples'
FILES_GET_URI='samples/{Id}/files'
def get_request(req):
request = urllib2.Request(req)
response = urllib2.urlopen(request)
return json.loads(response.read())
def list_items(req):
return iter(req['Response']['Items'])
def download_file(tuple_uri):
request_uri,project_name,file_name = tuple_uri
output_file_name = os.path.abspath(os.path.join(project_name,file_name))
output_dir_name = os.path.dirname(output_file_name)
try:
if not os.path.exists(output_dir_name):
os.makedirs(output_dir_name)
except:
pass
print '%s ---> %s' % (file_name,output_file_name)
with open(output_file_name,'wb') as outfile:
response = urllib2.urlopen((request_uri),timeout=3)
outfile.write(response.read())
def download_project(project_name="project1"):
request_uri= '{0}{1}{2}'.format(BASE_URI,PROJECTS_GET_URI,RESOURCE_COLLECTION_REQUESTS)
files_uris = []
for i in list_items(get_request(request_uri)):
if i['Name'] == project_name:
request_uri= '{0}{1}{2}'.format(BASE_URI,SAMPLES_GET_URI.format(Id=i['Id']),RESOURCE_COLLECTION_REQUESTS)
for j in list_items(get_request(request_uri)):
request_uri= '{0}{1}{2}'.format(BASE_URI,FILES_GET_URI.format(Id=j['Id']),RESOURCE_COLLECTION_REQUESTS)
for k in list_items(get_request(request_uri)):
files_uris.append(['http://api.basespace.illumina.com/{0}/content?access_token={1}'.format(k['Href'],ACCESS_TOKEN),
project_name,
k['Path']
])
break
ppool = multiprocessing.Pool(multiprocessing.cpu_count())
ppool.map(download_file,files_uris)
ppool.close()
if __name__=='__main__':
download_project(project_name=sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment