Skip to content

Instantly share code, notes, and snippets.

@eco32i
Created July 3, 2015 16:53
Show Gist options
  • Save eco32i/7648fcb8e6295a63386a to your computer and use it in GitHub Desktop.
Save eco32i/7648fcb8e6295a63386a to your computer and use it in GitHub Desktop.
Download .fastq files from basespace
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download run data from `basespace`"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sample_base_url = 'http://api.basespace.illumina.com/v1pre3/samples'\n",
"run_base_url = 'http://api.basespace.illumina.com/v1pre3/runs'\n",
"access_token = '<your access token here>'\n",
"\n",
"# uncomment and put here your sample ids and run id\n",
"# samples = []\n",
"# run_id = \n",
"\n",
"request = '{0}/{1}/files?access_token={2}'.format(run_base_url, run_id, access_token)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from __future__ import division, print_function\n",
"from urllib2 import Request, urlopen, URLError\n",
"from itertools import izip\n",
"import json\n",
"import math\n",
"import sys\n",
"import os\n",
"import socket"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def rest_request(raw_request):\n",
" request = Request(raw_request)\n",
" try:\n",
" response = urlopen(request)\n",
" json_string = response.read()\n",
" json_obj = json.loads(json_string)\n",
" except URLError, e:\n",
" print('Error retrieving {0}. Got an error code: {1}'.format(raw_request, e))\n",
" json_obj = None\n",
" return json_obj\n",
"\n",
"\n",
"def download_rest_request(raw_request, path, datadir='../data'):\n",
" dirname = os.path.join(datadir, os.path.dirname(path))\n",
"\n",
" if not os.path.isdir(dirname):\n",
" os.makedirs(dirname)\n",
" \n",
" request = (raw_request)\n",
" with open(os.path.join(datadir, path), 'wb') as fo: \n",
" try:\n",
" response = urlopen(request, timeout=1)\n",
" fo.write(response.read())\n",
" \n",
" except URLError, e:\n",
" print('Error retrieving {0}. Got an error code: {1}'.format(raw_request, e))\n",
" \n",
" except socket.error:\n",
" print('Socket error. Retrying...')\n",
" download_rest_request(raw_request, path)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\tdownloading BJ-HSR1_S1_L001_R1_001.fastq.gz\t...\n",
"\tdownloading BJ-HSR1_S1_L001_R2_001.fastq.gz\t...\n",
"\tdownloading BJ-HSR1_S1_L002_R1_001.fastq.gz\t...\n",
"\tdownloading BJ-HSR1_S1_L002_R2_001.fastq.gz\t...\n",
"\tdownloading BJ-HSR1_S1_L003_R1_001.fastq.gz\t...\n",
"\tdownloading BJ-HSR1_S1_L003_R2_001.fastq.gz\t...\n",
"\tdownloading BJ-HSR1_S1_L004_R1_001.fastq.gz\t...\n",
"\tdownloading BJ-HSR1_S1_L004_R2_001.fastq.gz\t...\n",
"\tdownloading MRC-9-HSR1_S2_L001_R1_001.fastq.gz\t...\n",
"\tdownloading MRC-9-HSR1_S2_L001_R2_001.fastq.gz\t...\n",
"\tdownloading MRC-9-HSR1_S2_L002_R1_001.fastq.gz\t...\n",
"\tdownloading MRC-9-HSR1_S2_L002_R2_001.fastq.gz\t...\n",
"\tdownloading MRC-9-HSR1_S2_L003_R1_001.fastq.gz\t...\n",
"\tdownloading MRC-9-HSR1_S2_L003_R2_001.fastq.gz\t...\n",
"\tdownloading MRC-9-HSR1_S2_L004_R1_001.fastq.gz\t...\n",
"\tdownloading MRC-9-HSR1_S2_L004_R2_001.fastq.gz\t...\n",
"\tdownloading MRC9-HSP70_S3_L001_R1_001.fastq.gz\t...\n",
"\tdownloading MRC9-HSP70_S3_L001_R2_001.fastq.gz\t...\n",
"\tdownloading MRC9-HSP70_S3_L002_R1_001.fastq.gz\t...\n",
"\tdownloading MRC9-HSP70_S3_L002_R2_001.fastq.gz\t...\n",
"\tdownloading MRC9-HSP70_S3_L003_R1_001.fastq.gz\t...\n",
"\tdownloading MRC9-HSP70_S3_L003_R2_001.fastq.gz\t...\n",
"\tdownloading MRC9-HSP70_S3_L004_R1_001.fastq.gz\t...\n",
"\tdownloading MRC9-HSP70_S3_L004_R2_001.fastq.gz\t...\n"
]
}
],
"source": [
"hrefs = []\n",
"paths = []\n",
"\n",
"for sample_id in samples:\n",
" request = '{0}/{1}/files?access_token={2}'.format(sample_base_url, sample_id, access_token) \n",
" json_obj = rest_request(request)\n",
" for file_item in json_obj['Response']['Items']:\n",
" hrefs.append(file_item['Href'])\n",
" paths.append(file_item['Path'])\n",
" \n",
"for href, path in izip(hrefs, paths):\n",
" request = 'http://api.basespace.illumina.com/{0}/content?access_token={1}'.format(href, access_token)\n",
" print('\\tdownloading {0}\\t...'.format(path))\n",
" download_rest_request(request, path)\n",
" sys.stdout.flush()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment