Skip to content

Instantly share code, notes, and snippets.

@jurgjn
Last active July 26, 2023 19:53
Show Gist options
  • Save jurgjn/cc559d2b7b8bc91f2e09 to your computer and use it in GitHub Desktop.
Save jurgjn/cc559d2b7b8bc91f2e09 to your computer and use it in GitHub Desktop.
BaseSpaceRunDownloader
#!/usr/bin/python
# Force to use (system) python2
#!/usr/bin/env python
description = """\
Usage: BaseSpaceRunDownloader_v2.py -r <ProjectName> -a <AccessToken>
This is a modified version of the BaseSpace Python Run Downloader from Illumina. It takes
the project name (instead of the run id) and downloads base calls as fastq files (instead of
the image/intensity data).
It also checks that the sizes of the downloaded files match with what's reported by the
BaseSpace REST API. Downloaded files are organised by project/sample/file, just like in the
BaseSpace Downloader GUI app.
There's rudimentary support for resuming interrupted transfers. The script does not download
a file if there's a file with matching size already on the disk. Partially downloaded files
where the size of the local file does not match the BaseSpace REST API are detected as size
mismatches, but need to be examined/removed manually.
References
Original Illumina code: https://support.basespace.illumina.com/knowledgebase/articles/403618-python-run-downloader
BaseSpace REST API: https://developer.basespace.illumina.com/docs/content/documentation/rest-api/api-reference
Heng Li's notes on BaseSpace: https://gist.github.com/lh3/54f535b11a9ee5d3be8e
"""
from urllib2 import Request, urlopen, URLError
import json
import math
import sys
import os
import socket
import optparse
import pprint
import itertools
def arg_parser():
cwd_dir = os.getcwd()
parser = optparse.OptionParser()
parser.add_option( '-r', dest='runid', help='Run ID: required')
parser.add_option( '-a', dest='accesstoken', help='Access Token: required')
( options, args ) = parser.parse_args()
try:
if options.runid == None:
raise Exception
if options.accesstoken == None:
raise Exception
except Exception:
print description
sys.exit()
return options
def restrequest(rawrequest):
request = Request(rawrequest)
try:
response = urlopen(request)
json_string = response.read()
json_obj = json.loads(json_string)
except URLError, e:
print 'Got an error code:', e
sys.exit()
return json_obj
def downloadrestrequest(rawrequest,path):
#dirname = RunID + os.sep + os.path.dirname(path)
dirname = os.path.dirname(path)
if dirname != '':
if not os.path.isdir(dirname):
os.makedirs(dirname)
request = (rawrequest)
#outfile = open(RunID + os.sep + path,'wb')
outfile = open(path,'wb')
try:
response = urlopen(request,timeout=1)
outfile.write(response.read())
outfile.close()
except URLError, e:
print 'Got an error code:', e
outfile.close()
downloadrestrequest(rawrequest,path)
except socket.error:
print 'Got a socket error: retrying'
outfile.close()
downloadrestrequest(rawrequest,path)
options = arg_parser()
RunID = options.runid
AccessToken = options.accesstoken
# Given project name (stored in RunID), find the project ID
request = 'http://api.basespace.illumina.com/v1pre3/users/current/projects?access_token=%s&limit=1000' % (AccessToken,)
json_obj = restrequest(request)
for project in json_obj['Response']['Items']:
if project['Name'] == RunID:
(ProjectName, ProjectID) = (project['Name'], project['Id'])
ProjectDir = "%(ProjectName)s-%(ProjectID)s" % locals()
print ProjectDir
# Get a list of sample IDs in a given project (yes, this barfs with >1000 samples)
request = 'http://api.basespace.illumina.com/v1pre3/projects/%s/samples?access_token=%s&limit=1000' %(ProjectID,AccessToken)
json_obj = restrequest(request)
for sample in itertools.islice(json_obj['Response']['Items'], None):
(SampleName, SampleID) = (sample['Name'], sample['Id'])
SampleDir = os.path.join(ProjectDir, "%(SampleName)s-%(SampleID)s" % locals())
print SampleDir
sample_request = 'http://api.basespace.illumina.com/v1pre3/samples/%s/files?access_token=%s' %(sample['Id'],AccessToken)
sample_json_obj = restrequest(sample_request)
for sample_file in itertools.islice(sample_json_obj['Response']['Items'], None):
FilePath = os.path.join(SampleDir, sample_file['Path'],)
print FilePath,
if not os.path.isfile(FilePath):
file_request = 'http://api.basespace.illumina.com/%s/content?access_token=%s'%(sample_file['Href'],AccessToken)
downloadrestrequest(file_request, FilePath)
if sample_file['Size'] == os.path.getsize(FilePath):
print 'size_ok'
else:
print 'size_mismatch'
sys.exit('File sizes differ between disk and BaseSpace for %(FilePath)s' % locals())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment