victorlin/coronaviridae_sra_experiments.ipynb

## coronaviridae_sra_experiments.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Bio import Entrez\n",
    "Entrez.email = \"<>@gmail.com\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### search for all SRA samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2749 experiments\n"
     ]
    }
   ],
   "source": [
    "# Coronaviridae - https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=11118\n",
    "sra_search_term = \"txid11118[Organism:exp]\"\n",
    "with Entrez.esearch(db=\"sra\", term=sra_search_term, retmax=3000) as handle_betacov:\n",
    "    record = Entrez.read(handle_betacov)\n",
    "    num_experiments = int(record['Count'])\n",
    "    uid_list = record['IdList']\n",
    "    print(f'{num_experiments} experiments')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fetching record 1 to 100\n",
      "Fetching record 101 to 200\n",
      "Fetching record 201 to 300\n",
      "Fetching record 301 to 400\n",
      "Fetching record 401 to 500\n",
      "Fetching record 501 to 600\n",
      "Fetching record 601 to 700\n",
      "Fetching record 701 to 800\n",
      "Fetching record 801 to 900\n",
      "Fetching record 901 to 1000\n",
      "Fetching record 1001 to 1100\n",
      "Fetching record 1101 to 1200\n",
      "Fetching record 1201 to 1300\n",
      "Fetching record 1301 to 1400\n",
      "Fetching record 1401 to 1500\n",
      "Fetching record 1501 to 1600\n",
      "Fetching record 1601 to 1700\n",
      "Fetching record 1701 to 1800\n",
      "Fetching record 1801 to 1900\n",
      "Fetching record 1901 to 2000\n",
      "Fetching record 2001 to 2100\n",
      "Fetching record 2101 to 2200\n",
      "Fetching record 2201 to 2300\n",
      "Fetching record 2301 to 2400\n",
      "Fetching record 2401 to 2500\n",
      "Fetching record 2501 to 2600\n",
      "Fetching record 2601 to 2700\n",
      "Fetching record 2701 to 2749\n"
     ]
    }
   ],
   "source": [
    "batch_size = 100\n",
    "experiments = []\n",
    "for start in range(0, num_experiments, batch_size):\n",
    "    end = min(num_experiments, start + batch_size)\n",
    "    print(f\"Fetching record {start + 1} to {end}\")\n",
    "    uids = uid_list[start:end]\n",
    "    with Entrez.esummary(db=\"sra\", id=','.join(uids)) as handle_esummary:\n",
    "        data = Entrez.read(handle_esummary)\n",
    "        experiments.extend(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_experiment_id(exp_xml):\n",
    "    idx_exp_tag = exp_xml.find('<Experiment')\n",
    "    start = idx_exp_tag + 17\n",
    "    end = idx_exp_tag + 27\n",
    "    return exp_xml[start:end]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_ids = [get_experiment_id(exp['ExpXml']) for exp in experiments]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pysradb import SRAweb\n",
    "db = SRAweb()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>study_accession</th>\n",
       "      <th>experiment_accession</th>\n",
       "      <th>experiment_title</th>\n",
       "      <th>experiment_desc</th>\n",
       "      <th>organism_taxid</th>\n",
       "      <th>organism_name</th>\n",
       "      <th>library_strategy</th>\n",
       "      <th>library_source</th>\n",
       "      <th>library_selection</th>\n",
       "      <th>sample_accession</th>\n",
       "      <th>sample_title</th>\n",
       "      <th>instrument</th>\n",
       "      <th>total_spots</th>\n",
       "      <th>total_size</th>\n",
       "      <th>run_accession</th>\n",
       "      <th>run_total_spots</th>\n",
       "      <th>run_total_bases</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ERP001119</td>\n",
       "      <td>ERX207938</td>\n",
       "      <td></td>\n",
       "      <td>Metagenomics of Betacoronavirus</td>\n",
       "      <td>694002</td>\n",
       "      <td>Betacoronavirus</td>\n",
       "      <td>AMPLICON</td>\n",
       "      <td>GENOMIC</td>\n",
       "      <td>PCR</td>\n",
       "      <td>ERS184553</td>\n",
       "      <td></td>\n",
       "      <td>Illumina MiSeq</td>\n",
       "      <td>6555345</td>\n",
       "      <td>1248079055</td>\n",
       "      <td>ERR233433</td>\n",
       "      <td>6555345</td>\n",
       "      <td>1966603500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SRP254080</td>\n",
       "      <td>SRX8004780</td>\n",
       "      <td>RNA-Seq of recombinant SARS-CoV-2: nucleotides...</td>\n",
       "      <td>RNA-Seq of recombinant SARS-CoV-2: nucleotides...</td>\n",
       "      <td>2697049</td>\n",
       "      <td>Severe acute respiratory syndrome coronavirus 2</td>\n",
       "      <td>RNA-Seq</td>\n",
       "      <td>TRANSCRIPTOMIC</td>\n",
       "      <td>RANDOM</td>\n",
       "      <td>SRS6378608</td>\n",
       "      <td></td>\n",
       "      <td>Illumina NovaSeq 6000</td>\n",
       "      <td>63592467</td>\n",
       "      <td>6219772044</td>\n",
       "      <td>SRR11426414</td>\n",
       "      <td>63592467</td>\n",
       "      <td>18753570814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SRP254080</td>\n",
       "      <td>SRX8004779</td>\n",
       "      <td>RNA-Seq of recombinant SARS-CoV-2: SARS-CoV-mo...</td>\n",
       "      <td>RNA-Seq of recombinant SARS-CoV-2: SARS-CoV-mo...</td>\n",
       "      <td>2697049</td>\n",
       "      <td>Severe acute respiratory syndrome coronavirus 2</td>\n",
       "      <td>RNA-Seq</td>\n",
       "      <td>TRANSCRIPTOMIC</td>\n",
       "      <td>RANDOM</td>\n",
       "      <td>SRS6378607</td>\n",
       "      <td></td>\n",
       "      <td>Illumina NovaSeq 6000</td>\n",
       "      <td>65892888</td>\n",
       "      <td>6384922546</td>\n",
       "      <td>SRR11426415</td>\n",
       "      <td>65892888</td>\n",
       "      <td>19482216314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>SRP254080</td>\n",
       "      <td>SRX8004778</td>\n",
       "      <td>RNA-Seq of recombinant SARS-CoV-2: GFP in ORF7a</td>\n",
       "      <td>RNA-Seq of recombinant SARS-CoV-2: GFP in ORF7a</td>\n",
       "      <td>2697049</td>\n",
       "      <td>Severe acute respiratory syndrome coronavirus 2</td>\n",
       "      <td>RNA-Seq</td>\n",
       "      <td>TRANSCRIPTOMIC</td>\n",
       "      <td>RANDOM</td>\n",
       "      <td>SRS6378606</td>\n",
       "      <td></td>\n",
       "      <td>Illumina NovaSeq 6000</td>\n",
       "      <td>68805012</td>\n",
       "      <td>6711704333</td>\n",
       "      <td>SRR11426416</td>\n",
       "      <td>68805012</td>\n",
       "      <td>20332189030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>SRP254080</td>\n",
       "      <td>SRX8004777</td>\n",
       "      <td>RNA-Seq of recombinant SARS-CoV-2: nucleotides...</td>\n",
       "      <td>RNA-Seq of recombinant SARS-CoV-2: nucleotides...</td>\n",
       "      <td>2697049</td>\n",
       "      <td>Severe acute respiratory syndrome coronavirus 2</td>\n",
       "      <td>RNA-Seq</td>\n",
       "      <td>TRANSCRIPTOMIC</td>\n",
       "      <td>RANDOM</td>\n",
       "      <td>SRS6378605</td>\n",
       "      <td></td>\n",
       "      <td>Illumina NovaSeq 6000</td>\n",
       "      <td>72726462</td>\n",
       "      <td>7001420132</td>\n",
       "      <td>SRR11426417</td>\n",
       "      <td>72726462</td>\n",
       "      <td>21378281198</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  study_accession experiment_accession  \\\n",
       "0       ERP001119            ERX207938   \n",
       "1       SRP254080           SRX8004780   \n",
       "2       SRP254080           SRX8004779   \n",
       "3       SRP254080           SRX8004778   \n",
       "4       SRP254080           SRX8004777   \n",
       "\n",
       "                                    experiment_title  \\\n",
       "0                                                      \n",
       "1  RNA-Seq of recombinant SARS-CoV-2: nucleotides...   \n",
       "2  RNA-Seq of recombinant SARS-CoV-2: SARS-CoV-mo...   \n",
       "3    RNA-Seq of recombinant SARS-CoV-2: GFP in ORF7a   \n",
       "4  RNA-Seq of recombinant SARS-CoV-2: nucleotides...   \n",
       "\n",
       "                                     experiment_desc organism_taxid   \\\n",
       "0                    Metagenomics of Betacoronavirus          694002   \n",
       "1  RNA-Seq of recombinant SARS-CoV-2: nucleotides...         2697049   \n",
       "2  RNA-Seq of recombinant SARS-CoV-2: SARS-CoV-mo...         2697049   \n",
       "3    RNA-Seq of recombinant SARS-CoV-2: GFP in ORF7a         2697049   \n",
       "4  RNA-Seq of recombinant SARS-CoV-2: nucleotides...         2697049   \n",
       "\n",
       "                                     organism_name library_strategy  \\\n",
       "0                                  Betacoronavirus         AMPLICON   \n",
       "1  Severe acute respiratory syndrome coronavirus 2          RNA-Seq   \n",
       "2  Severe acute respiratory syndrome coronavirus 2          RNA-Seq   \n",
       "3  Severe acute respiratory syndrome coronavirus 2          RNA-Seq   \n",
       "4  Severe acute respiratory syndrome coronavirus 2          RNA-Seq   \n",
       "\n",
       "   library_source library_selection sample_accession sample_title  \\\n",
       "0         GENOMIC               PCR        ERS184553                \n",
       "1  TRANSCRIPTOMIC            RANDOM       SRS6378608                \n",
       "2  TRANSCRIPTOMIC            RANDOM       SRS6378607                \n",
       "3  TRANSCRIPTOMIC            RANDOM       SRS6378606                \n",
       "4  TRANSCRIPTOMIC            RANDOM       SRS6378605                \n",
       "\n",
       "              instrument total_spots  total_size run_accession  \\\n",
       "0         Illumina MiSeq     6555345  1248079055     ERR233433   \n",
       "1  Illumina NovaSeq 6000    63592467  6219772044   SRR11426414   \n",
       "2  Illumina NovaSeq 6000    65892888  6384922546   SRR11426415   \n",
       "3  Illumina NovaSeq 6000    68805012  6711704333   SRR11426416   \n",
       "4  Illumina NovaSeq 6000    72726462  7001420132   SRR11426417   \n",
       "\n",
       "  run_total_spots run_total_bases  \n",
       "0         6555345      1966603500  \n",
       "1        63592467     18753570814  \n",
       "2        65892888     19482216314  \n",
       "3        68805012     20332189030  \n",
       "4        72726462     21378281198  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = db.sra_metadata(exp_ids)\n",
    "df.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"from Bio import Entrez\n",
	"Entrez.email = \"<>@gmail.com\""
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### search for all SRA samples"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2749 experiments\n"
	]
	}
	],
	"source": [
	"# Coronaviridae - https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=11118\n",
	"sra_search_term = \"txid11118[Organism:exp]\"\n",
	"with Entrez.esearch(db=\"sra\", term=sra_search_term, retmax=3000) as handle_betacov:\n",
	" record = Entrez.read(handle_betacov)\n",
	" num_experiments = int(record['Count'])\n",
	" uid_list = record['IdList']\n",
	" print(f'{num_experiments} experiments')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Fetching record 1 to 100\n",
	"Fetching record 101 to 200\n",
	"Fetching record 201 to 300\n",
	"Fetching record 301 to 400\n",
	"Fetching record 401 to 500\n",
	"Fetching record 501 to 600\n",
	"Fetching record 601 to 700\n",
	"Fetching record 701 to 800\n",
	"Fetching record 801 to 900\n",
	"Fetching record 901 to 1000\n",
	"Fetching record 1001 to 1100\n",
	"Fetching record 1101 to 1200\n",
	"Fetching record 1201 to 1300\n",
	"Fetching record 1301 to 1400\n",
	"Fetching record 1401 to 1500\n",
	"Fetching record 1501 to 1600\n",
	"Fetching record 1601 to 1700\n",
	"Fetching record 1701 to 1800\n",
	"Fetching record 1801 to 1900\n",
	"Fetching record 1901 to 2000\n",
	"Fetching record 2001 to 2100\n",
	"Fetching record 2101 to 2200\n",
	"Fetching record 2201 to 2300\n",
	"Fetching record 2301 to 2400\n",
	"Fetching record 2401 to 2500\n",
	"Fetching record 2501 to 2600\n",
	"Fetching record 2601 to 2700\n",
	"Fetching record 2701 to 2749\n"
	]
	}
	],
	"source": [
	"batch_size = 100\n",
	"experiments = []\n",
	"for start in range(0, num_experiments, batch_size):\n",
	" end = min(num_experiments, start + batch_size)\n",
	" print(f\"Fetching record {start + 1} to {end}\")\n",
	" uids = uid_list[start:end]\n",
	" with Entrez.esummary(db=\"sra\", id=','.join(uids)) as handle_esummary:\n",
	" data = Entrez.read(handle_esummary)\n",
	" experiments.extend(data)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_experiment_id(exp_xml):\n",
	" idx_exp_tag = exp_xml.find('<Experiment')\n",
	" start = idx_exp_tag + 17\n",
	" end = idx_exp_tag + 27\n",
	" return exp_xml[start:end]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"exp_ids = [get_experiment_id(exp['ExpXml']) for exp in experiments]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from pysradb import SRAweb\n",
	"db = SRAweb()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>study_accession</th>\n",
	" <th>experiment_accession</th>\n",
	" <th>experiment_title</th>\n",
	" <th>experiment_desc</th>\n",
	" <th>organism_taxid</th>\n",
	" <th>organism_name</th>\n",
	" <th>library_strategy</th>\n",
	" <th>library_source</th>\n",
	" <th>library_selection</th>\n",
	" <th>sample_accession</th>\n",
	" <th>sample_title</th>\n",
	" <th>instrument</th>\n",
	" <th>total_spots</th>\n",
	" <th>total_size</th>\n",
	" <th>run_accession</th>\n",
	" <th>run_total_spots</th>\n",
	" <th>run_total_bases</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>ERP001119</td>\n",
	" <td>ERX207938</td>\n",
	" <td></td>\n",
	" <td>Metagenomics of Betacoronavirus</td>\n",
	" <td>694002</td>\n",
	" <td>Betacoronavirus</td>\n",
	" <td>AMPLICON</td>\n",
	" <td>GENOMIC</td>\n",
	" <td>PCR</td>\n",
	" <td>ERS184553</td>\n",
	" <td></td>\n",
	" <td>Illumina MiSeq</td>\n",
	" <td>6555345</td>\n",
	" <td>1248079055</td>\n",
	" <td>ERR233433</td>\n",
	" <td>6555345</td>\n",
	" <td>1966603500</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>SRP254080</td>\n",
	" <td>SRX8004780</td>\n",
	" <td>RNA-Seq of recombinant SARS-CoV-2: nucleotides...</td>\n",
	" <td>RNA-Seq of recombinant SARS-CoV-2: nucleotides...</td>\n",
	" <td>2697049</td>\n",
	" <td>Severe acute respiratory syndrome coronavirus 2</td>\n",
	" <td>RNA-Seq</td>\n",
	" <td>TRANSCRIPTOMIC</td>\n",
	" <td>RANDOM</td>\n",
	" <td>SRS6378608</td>\n",
	" <td></td>\n",
	" <td>Illumina NovaSeq 6000</td>\n",
	" <td>63592467</td>\n",
	" <td>6219772044</td>\n",
	" <td>SRR11426414</td>\n",
	" <td>63592467</td>\n",
	" <td>18753570814</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>SRP254080</td>\n",
	" <td>SRX8004779</td>\n",
	" <td>RNA-Seq of recombinant SARS-CoV-2: SARS-CoV-mo...</td>\n",
	" <td>RNA-Seq of recombinant SARS-CoV-2: SARS-CoV-mo...</td>\n",
	" <td>2697049</td>\n",
	" <td>Severe acute respiratory syndrome coronavirus 2</td>\n",
	" <td>RNA-Seq</td>\n",
	" <td>TRANSCRIPTOMIC</td>\n",
	" <td>RANDOM</td>\n",
	" <td>SRS6378607</td>\n",
	" <td></td>\n",
	" <td>Illumina NovaSeq 6000</td>\n",
	" <td>65892888</td>\n",
	" <td>6384922546</td>\n",
	" <td>SRR11426415</td>\n",
	" <td>65892888</td>\n",
	" <td>19482216314</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>SRP254080</td>\n",
	" <td>SRX8004778</td>\n",
	" <td>RNA-Seq of recombinant SARS-CoV-2: GFP in ORF7a</td>\n",
	" <td>RNA-Seq of recombinant SARS-CoV-2: GFP in ORF7a</td>\n",
	" <td>2697049</td>\n",
	" <td>Severe acute respiratory syndrome coronavirus 2</td>\n",
	" <td>RNA-Seq</td>\n",
	" <td>TRANSCRIPTOMIC</td>\n",
	" <td>RANDOM</td>\n",
	" <td>SRS6378606</td>\n",
	" <td></td>\n",
	" <td>Illumina NovaSeq 6000</td>\n",
	" <td>68805012</td>\n",
	" <td>6711704333</td>\n",
	" <td>SRR11426416</td>\n",
	" <td>68805012</td>\n",
	" <td>20332189030</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>SRP254080</td>\n",
	" <td>SRX8004777</td>\n",
	" <td>RNA-Seq of recombinant SARS-CoV-2: nucleotides...</td>\n",
	" <td>RNA-Seq of recombinant SARS-CoV-2: nucleotides...</td>\n",
	" <td>2697049</td>\n",
	" <td>Severe acute respiratory syndrome coronavirus 2</td>\n",
	" <td>RNA-Seq</td>\n",
	" <td>TRANSCRIPTOMIC</td>\n",
	" <td>RANDOM</td>\n",
	" <td>SRS6378605</td>\n",
	" <td></td>\n",
	" <td>Illumina NovaSeq 6000</td>\n",
	" <td>72726462</td>\n",
	" <td>7001420132</td>\n",
	" <td>SRR11426417</td>\n",
	" <td>72726462</td>\n",
	" <td>21378281198</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" study_accession experiment_accession \\\n",
	"0 ERP001119 ERX207938 \n",
	"1 SRP254080 SRX8004780 \n",
	"2 SRP254080 SRX8004779 \n",
	"3 SRP254080 SRX8004778 \n",
	"4 SRP254080 SRX8004777 \n",
	"\n",
	" experiment_title \\\n",
	"0 \n",
	"1 RNA-Seq of recombinant SARS-CoV-2: nucleotides... \n",
	"2 RNA-Seq of recombinant SARS-CoV-2: SARS-CoV-mo... \n",
	"3 RNA-Seq of recombinant SARS-CoV-2: GFP in ORF7a \n",
	"4 RNA-Seq of recombinant SARS-CoV-2: nucleotides... \n",
	"\n",
	" experiment_desc organism_taxid \\\n",
	"0 Metagenomics of Betacoronavirus 694002 \n",
	"1 RNA-Seq of recombinant SARS-CoV-2: nucleotides... 2697049 \n",
	"2 RNA-Seq of recombinant SARS-CoV-2: SARS-CoV-mo... 2697049 \n",
	"3 RNA-Seq of recombinant SARS-CoV-2: GFP in ORF7a 2697049 \n",
	"4 RNA-Seq of recombinant SARS-CoV-2: nucleotides... 2697049 \n",
	"\n",
	" organism_name library_strategy \\\n",
	"0 Betacoronavirus AMPLICON \n",
	"1 Severe acute respiratory syndrome coronavirus 2 RNA-Seq \n",
	"2 Severe acute respiratory syndrome coronavirus 2 RNA-Seq \n",
	"3 Severe acute respiratory syndrome coronavirus 2 RNA-Seq \n",
	"4 Severe acute respiratory syndrome coronavirus 2 RNA-Seq \n",
	"\n",
	" library_source library_selection sample_accession sample_title \\\n",
	"0 GENOMIC PCR ERS184553 \n",
	"1 TRANSCRIPTOMIC RANDOM SRS6378608 \n",
	"2 TRANSCRIPTOMIC RANDOM SRS6378607 \n",
	"3 TRANSCRIPTOMIC RANDOM SRS6378606 \n",
	"4 TRANSCRIPTOMIC RANDOM SRS6378605 \n",
	"\n",
	" instrument total_spots total_size run_accession \\\n",
	"0 Illumina MiSeq 6555345 1248079055 ERR233433 \n",
	"1 Illumina NovaSeq 6000 63592467 6219772044 SRR11426414 \n",
	"2 Illumina NovaSeq 6000 65892888 6384922546 SRR11426415 \n",
	"3 Illumina NovaSeq 6000 68805012 6711704333 SRR11426416 \n",
	"4 Illumina NovaSeq 6000 72726462 7001420132 SRR11426417 \n",
	"\n",
	" run_total_spots run_total_bases \n",
	"0 6555345 1966603500 \n",
	"1 63592467 18753570814 \n",
	"2 65892888 19482216314 \n",
	"3 68805012 20332189030 \n",
	"4 72726462 21378281198 "
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df = db.sra_metadata(exp_ids)\n",
	"df.head()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}