Skip to content

Instantly share code, notes, and snippets.

@pansapiens
Created January 22, 2018 04:01
Show Gist options
  • Save pansapiens/5d91380baba954bfd3c57238d7321dd3 to your computer and use it in GitHub Desktop.
Save pansapiens/5d91380baba954bfd3c57238d7321dd3 to your computer and use it in GitHub Desktop.
Interacting with the ENA REST API
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: lxml in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages\n",
"Requirement already satisfied: enasearch in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages\n",
"Requirement already satisfied: sphinx-rtd-theme in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: Sphinx in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: requests in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: pytest-cov in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: dicttoxml in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: codecov in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: Click in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: xmltodict in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: biopython in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: flake8 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from enasearch)\n",
"Requirement already satisfied: Jinja2>=2.3 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: snowballstemmer>=1.1 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: docutils>=0.11 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: alabaster<0.8,>=0.7 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: six>=1.5 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: imagesize in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: babel!=2.0,>=1.3 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: Pygments>=2.0 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: setuptools in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: sphinxcontrib-websupport in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Sphinx->enasearch)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from requests->enasearch)\n",
"Requirement already satisfied: urllib3<1.23,>=1.21.1 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from requests->enasearch)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from requests->enasearch)\n",
"Requirement already satisfied: idna<2.7,>=2.5 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from requests->enasearch)\n",
"Requirement already satisfied: coverage>=3.7.1 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from pytest-cov->enasearch)\n",
"Requirement already satisfied: pytest>=2.6.0 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from pytest-cov->enasearch)\n",
"Requirement already satisfied: numpy in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from biopython->enasearch)\n",
"Requirement already satisfied: pycodestyle<2.4.0,>=2.0.0 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from flake8->enasearch)\n",
"Requirement already satisfied: mccabe<0.7.0,>=0.6.0 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from flake8->enasearch)\n",
"Requirement already satisfied: pyflakes<1.7.0,>=1.5.0 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from flake8->enasearch)\n",
"Requirement already satisfied: MarkupSafe>=0.23 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from Jinja2>=2.3->Sphinx->enasearch)\n",
"Requirement already satisfied: pytz>=0a in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from babel!=2.0,>=1.3->Sphinx->enasearch)\n",
"Requirement already satisfied: attrs>=17.2.0 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from pytest>=2.6.0->pytest-cov->enasearch)\n",
"Requirement already satisfied: pluggy<0.7,>=0.5 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from pytest>=2.6.0->pytest-cov->enasearch)\n",
"Requirement already satisfied: py>=1.5.0 in /Users/perry/.virtualenvs/laxy/lib/python3.6/site-packages (from pytest>=2.6.0->pytest-cov->enasearch)\n"
]
}
],
"source": [
"!pip install lxml\n",
"!pip install enasearch"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import csv\n",
"from lxml import etree\n",
"from collections import OrderedDict"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"# accession = 'SRR950078'\n",
"accession = 'PRJNA214799'\n",
"record = requests.get(f'https://www.ebi.ac.uk/ena/data/view/{accession}&display=xml&download=xml')\n",
"xml = etree.fromstring(record.content)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"fastq_table_url = xml.xpath(\"//XREF_LINK/DB[text()='ENA-FASTQ-FILES']/following-sibling::ID/text()\")[0]"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"fastq_table = requests.get(fastq_table_url)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"table = [row for row in csv.DictReader(fastq_table.text.splitlines(), delimiter='\\t')]"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950078/SRR950078_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950078/SRR950078_2.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950079/SRR950079_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950079/SRR950079_2.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950080/SRR950080_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950080/SRR950080_2.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950081/SRR950081_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950081/SRR950081_2.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950082/SRR950082_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950082/SRR950082_2.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950083/SRR950083_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950083/SRR950083_2.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950084/SRR950084_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950084/SRR950084_2.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950085/SRR950085_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950085/SRR950085_2.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950086/SRR950086_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950086/SRR950086_2.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950087/SRR950087_1.fastq.gz', 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950087/SRR950087_2.fastq.gz']\n"
]
}
],
"source": [
"urls = []\n",
"for rec in table:\n",
" if ';' in rec['fastq_ftp']:\n",
" ftp_links = rec['fastq_ftp'].split(';')\n",
" else:\n",
" ftp_links = [rec['fastq_ftp']]\n",
" \n",
" urls.extend(ftp_links)\n",
" \n",
"print(urls)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"def parse_ena_fastq_table(table):\n",
" table = [row for row in csv.DictReader(table.splitlines(), delimiter='\\t')]\n",
" by_url = dict()\n",
" for rec in table:\n",
" accession = rec[list(rec.keys())[0]]\n",
" if ';' in rec['fastq_ftp']:\n",
" links = rec['fastq_ftp'].split(';')\n",
" checksums = rec['fastq_md5'].split(';')\n",
" sizes = rec['fastq_bytes'].split(';')\n",
" else:\n",
" links = rec['fastq_ftp']\n",
" checksums = rec['fastq_md5']\n",
" sizes = rec['fastq_bytes']\n",
" \n",
" for url, md5, size in zip(links, checksums, sizes):\n",
" by_url[url] = OrderedDict({'accession': accession, \n",
" 'md5': md5, \n",
" 'size': size})\n",
" \n",
" return by_url"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"def get_ena_fastq_urls(accession):\n",
" record_url = f'https://www.ebi.ac.uk/ena/data/view/{accession}&display=xml&download=xml'\n",
" record = requests.get(record_url)\n",
" xml = etree.fromstring(record.content)\n",
" xref_db_links = xml.xpath(\"//XREF_LINK/DB[text()='ENA-FASTQ-FILES']/following-sibling::ID/text()\")\n",
" urls = []\n",
" for fastq_table_url in xref_db_links:\n",
" # print(fastq_table_url)\n",
" table = requests.get(fastq_table_url)\n",
" by_url = parse_ena_fastq_table(table.text)\n",
" # print(by_url)\n",
" urls.append(by_url)\n",
"\n",
" return urls"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950078/SRR950078_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950078'),\n",
" ('md5', 'eee21620ca17744147ff66cdd2529066'),\n",
" ('size', '8584375694')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950078/SRR950078_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950078'),\n",
" ('md5', '39763f20027f17eb83ab00dc7d2da65c'),\n",
" ('size', '8650136401')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950079/SRR950079_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950079'),\n",
" ('md5', '32f2b4790562d70279f3962ba33f06d1'),\n",
" ('size', '9414809474')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950079/SRR950079_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950079'),\n",
" ('md5', 'b01fd1ad3281d12c295d607d6589db13'),\n",
" ('size', '9508216711')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950080/SRR950080_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950080'),\n",
" ('md5', 'f02a0c4c9c1cdcf68b70814252103550'),\n",
" ('size', '7845415151')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950080/SRR950080_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950080'),\n",
" ('md5', 'df1cff93a09aa299a2e7e7f95f5d61a0'),\n",
" ('size', '7912973223')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950081/SRR950081_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950081'),\n",
" ('md5', 'd3eb0c891821311adce0fa7cbd0c5fdd'),\n",
" ('size', '9661234164')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950081/SRR950081_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950081'),\n",
" ('md5', '04fcb6b3403f218735d35d877a11167a'),\n",
" ('size', '9758348136')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950082/SRR950082_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950082'),\n",
" ('md5', '7aa936ff71e33342543dc1ce803ac12e'),\n",
" ('size', '5632947596')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950082/SRR950082_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950082'),\n",
" ('md5', 'e823aefdb9caffbdda9ba7af5ae85351'),\n",
" ('size', '5695733721')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950083/SRR950083_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950083'),\n",
" ('md5', '3df22a3df39679ac9b905eaf27780762'),\n",
" ('size', '9673029260')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950083/SRR950083_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950083'),\n",
" ('md5', '62c691bb6f9cb4ff4d98b86674571b8f'),\n",
" ('size', '9766566648')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950084/SRR950084_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950084'),\n",
" ('md5', 'e257b06a0808d759d21f9e1bb4148e7a'),\n",
" ('size', '10653669994')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950084/SRR950084_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950084'),\n",
" ('md5', 'b8fde78ac074f39fa18828628bb1fd9e'),\n",
" ('size', '10756207988')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950085/SRR950085_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950085'),\n",
" ('md5', '4c966e1c6a829d14d6df9d26655bb40c'),\n",
" ('size', '8069929116')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950085/SRR950085_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950085'),\n",
" ('md5', 'ca5526243f8ca8841bdbba94c20c70d9'),\n",
" ('size', '8140508542')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950086/SRR950086_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950086'),\n",
" ('md5', '3d138903b0edd6964b4672cc618930ab'),\n",
" ('size', '6179091639')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950086/SRR950086_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950086'),\n",
" ('md5', 'fffa2e5f241622b2ea052306febcce5e'),\n",
" ('size', '6257220013')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950087/SRR950087_1.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950087'),\n",
" ('md5', 'f54fbb14017fdc55c9ab4acdafa05e52'),\n",
" ('size', '5196257989')]),\n",
" 'ftp.sra.ebi.ac.uk/vol1/fastq/SRR950/SRR950087/SRR950087_2.fastq.gz': OrderedDict([('accession',\n",
" 'SRR950087'),\n",
" ('md5', 'd1eb50d9d458e09bcda66e8209d7765f'),\n",
" ('size', '5225837153')])}]"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_ena_fastq_urls('PRJNA214799')"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"fastq_urls = get_ena_fastq_urls('SRR950078')"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"import enasearch"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"result = enasearch.retrieve_data(ids='PRJNA214799', display=\"xml\")"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"ROOT\": {\n",
" \"@request\": \"PRJNA214799&display=xml&expanded=false&header=false\",\n",
" \"PROJECT\": {\n",
" \"@accession\": \"PRJNA214799\",\n",
" \"IDENTIFIERS\": {\n",
" \"PRIMARY_ID\": \"PRJNA214799\",\n",
" \"SECONDARY_ID\": \"SRP028705\",\n",
" \"EXTERNAL_ID\": {\n",
" \"@namespace\": \"GEO\",\n",
" \"#text\": \"GSE49712\"\n",
" }\n",
" },\n",
" \"NAME\": \"Homo sapiens\",\n",
" \"TITLE\": \"Comprehensive evaluation of differential gene expression analysis methods for RNA-seq data\",\n",
" \"DESCRIPTION\": \"A large number of computational methods have been recently developed for analyzing differential gene expression (DE) in RNA-seq data. We report on a comprehensive evaluation of the commonly used DE methods using the SEQC benchmark data set and data from ENCODE project. We evaluated a number of key features including: normalization, accuracy of DE detection and DE analysis when one condition has no detectable expression. We found significant differences among the methods. Furthermore, computational methods designed for DE detection from expression array data perform comparably to methods customized for RNA-seq. Most importantly, our results demonstrate that increasing the number of replicate samples significantly improves detection power over increased sequencing depth. Overall design: The Sequencing Quality Control Consortium generated two datasets from two reference RNA samples in order to evaluate transcriptome profiling by next-generation sequencing technology. Each sample contains one of the reference RNA source and a set of synthetic RNAs from the External RNA Control Consortium (ERCC) at known concentrations. Group A contains 5 replicates of the Strategene Universal Human Reference RNA (UHRR), which is composed of total RNA from 10 human cell lines, with 2% by volume of ERCC mix 1. Group B includes 5 replicate samples of the Ambion Human Brain Reference RNA (HBRR) with 2% by volume of ERCC mix 2. The ERCC spike-in control is a mixture of 92 synthetic polyadenylated oligonucleotides of 250-2000 nucleotides long that are meant to resemble human transcripts.\",\n",
" \"SUBMISSION_PROJECT\": {\n",
" \"SEQUENCING_PROJECT\": null,\n",
" \"ORGANISM\": {\n",
" \"SCIENTIFIC_NAME\": \"Homo sapiens\",\n",
" \"TAXON_ID\": \"9606\"\n",
" }\n",
" },\n",
" \"RELATED_PROJECTS\": {\n",
" \"RELATED_PROJECT\": {\n",
" \"PARENT_PROJECT\": {\n",
" \"@accession\": \"PRJNA208232\"\n",
" }\n",
" }\n",
" },\n",
" \"PROJECT_LINKS\": {\n",
" \"PROJECT_LINK\": [\n",
" {\n",
" \"XREF_LINK\": {\n",
" \"DB\": \"PUBMED\",\n",
" \"ID\": \"24020486\"\n",
" }\n",
" },\n",
" {\n",
" \"XREF_LINK\": {\n",
" \"DB\": \"ENA-SAMPLE\",\n",
" \"ID\": \"SRS468710-SRS468719\"\n",
" }\n",
" },\n",
" {\n",
" \"XREF_LINK\": {\n",
" \"DB\": \"ENA-EXPERIMENT\",\n",
" \"ID\": \"SRX333347-SRX333356\"\n",
" }\n",
" },\n",
" {\n",
" \"XREF_LINK\": {\n",
" \"DB\": \"ENA-RUN\",\n",
" \"ID\": \"SRR950078-SRR950087\"\n",
" }\n",
" },\n",
" {\n",
" \"XREF_LINK\": {\n",
" \"DB\": \"ENA-FASTQ-FILES\",\n",
" \"ID\": \"http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=PRJNA214799&result=read_run&fields=run_accession,fastq_ftp,fastq_md5,fastq_bytes\"\n",
" }\n",
" },\n",
" {\n",
" \"XREF_LINK\": {\n",
" \"DB\": \"ENA-SUBMITTED-FILES\",\n",
" \"ID\": \"http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=PRJNA214799&result=read_run&fields=run_accession,submitted_ftp,submitted_md5,submitted_bytes,submitted_format\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" \"PROJECT_ATTRIBUTES\": {\n",
" \"PROJECT_ATTRIBUTE\": [\n",
" {\n",
" \"TAG\": \"ENA-REFSEQ\",\n",
" \"VALUE\": \"N\"\n",
" },\n",
" {\n",
" \"TAG\": \"PROJECT-ID\",\n",
" \"VALUE\": \"214799\"\n",
" },\n",
" {\n",
" \"TAG\": \"NCBI-PROJECT-TYPE\",\n",
" \"VALUE\": \"SUBMISSION\"\n",
" },\n",
" {\n",
" \"TAG\": \"ENA-FIRST-PUBLIC\",\n",
" \"VALUE\": \"2013-08-21\"\n",
" },\n",
" {\n",
" \"TAG\": \"ENA-LAST-UPDATE\",\n",
" \"VALUE\": \"2018-01-19\"\n",
" }\n",
" ]\n",
" }\n",
" }\n",
" }\n",
"}\n"
]
}
],
"source": [
"import json\n",
"print(json.dumps(result, indent=2))"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"run_accession\texperiment_accession\tstudy_accession\tinstrument_platform\tlibrary_strategy\tread_count\tfastq_ftp\tfastq_md5\tfastq_bytes\n",
"SRR3038517\tSRX1494510\tPRJNA306661\tILLUMINA\tRNA-Seq\t19105795\tftp.sra.ebi.ac.uk/vol1/fastq/SRR303/007/SRR3038517/SRR3038517_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR303/007/SRR3038517/SRR3038517_2.fastq.gz\t8c1a944fc09156f886785eb64fdc668a;a3f03baf2d0546b5ba7c4eb8c7fec672\t1623547590;1646279139\n",
"SRR3038518\tSRX1494511\tPRJNA306661\tILLUMINA\tRNA-Seq\t20427466\tftp.sra.ebi.ac.uk/vol1/fastq/SRR303/008/SRR3038518/SRR3038518_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR303/008/SRR3038518/SRR3038518_2.fastq.gz\tc2ababebb9d9d33d9219e2c20d053b1d;f11a7e2808b3ab592f971376bd61988e\t1738099933;1754989703\n",
"SRR3038519\tSRX1494512\tPRJNA306661\tILLUMINA\tRNA-Seq\t19969965\tftp.sra.ebi.ac.uk/vol1/fastq/SRR303/009/SRR3038519/SRR3038519_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR303/009/SRR3038519/SRR3038519_2.fastq.gz\t8222e2b4ba4e3c35bcda0e0f7b17649e;e2bafac16f3d14a1d6c2915e86f53469\t1700411630;1715992194\n",
"SRR3038520\tSRX1494513\tPRJNA306661\tILLUMINA\tRNA-Seq\t17638975\tftp.sra.ebi.ac.uk/vol1/fastq/SRR303/000/SRR3038520/SRR3038520_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR303/000/SRR3038520/SRR3038520_2.fastq.gz\ta7886f49d6bdaceddd2b26fa5148fb3f;ed63fef014684b1e080d868735a8678e\t1500818640;1517715832\n",
"SRR3038521\tSRX1494514\tPRJNA306661\tILLUMINA\tRNA-Seq\t18588191\tftp.sra.ebi.ac.uk/vol1/fastq/SRR303/001/SRR3038521/SRR3038521_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR303/001/SRR3038521/SRR3038521_2.fastq.gz\te2eba16f70033d182870093b179848f6;f3b7ff0d6a8c2a3cc6ccdb1c4dc679a2\t1586833813;1602574562\n",
"SRR3038522\tSRX1494515\tPRJNA306661\tILLUMINA\tRNA-Seq\t18063354\tftp.sra.ebi.ac.uk/vol1/fastq/SRR303/002/SRR3038522/SRR3038522_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR303/002/SRR3038522/SRR3038522_2.fastq.gz\te893441ab368797de242f5050dcd5d96;506b95f8c66c9bcfdfd21058eadb1e3b\t1534185823;1549536514\n",
"\n"
]
}
],
"source": [
"table = enasearch.retrieve_run_report(accession='PRJNA306661', fields=\"run_accession,experiment_accession,study_accession,instrument_platform,library_strategy,read_count,fastq_ftp,fastq_md5,fastq_bytes\")\n",
"print(table)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment