Benjamin Langmead BenLangmead

## sra_growth.R
system('curl https://trace.ncbi.nlm.nih.gov/Traces/sra/sra_stat.cgi > /tmp/stats.csv')
st <- read.table('/tmp/stats.csv', sep=',', header=T)
st$date <- as.Date(st$date, format='%m/%d/%Y')
i <- min(which(st$bases >= 0.5625e16))
id1 <- i
id2 <- min(which(st$bases >= 1.125e16))
id3 <- min(which(st$bases >= 2.25e16))
id4 <- min(which(st$bases >= 4.5e16))
id5 <- min(which(st$bases >= 8.95e16))
plot(st$date[id1:id5], log10(st$bases[id1:id5]), type='l', xlab="Date", ylab="log10(Total SRA bases)")

## sradbv2.py
#!/usr/bin/env python

# Authors: Chris Wilks (original) and Ben Langmead (modifications)
#    Date: 7/3/2018
# License: MIT

"""sradbv2
Usage:
  sradbv2 search <lucene-search> [options]
  sradbv2 query [<SRP>,<SRR>]...  [options]

## gist:f40ff161c3be517b5b4d8867479fe4ee
> query=paste(
+ 'sample_taxon_id:9606',
+ 'experiment_library_strategy:"rna seq"',
+ 'experiment_library_source:transcriptomic',
+ 'experiment_platform:illumina')
> st <- 0
> SZ <- 500
> df <- sra_full_search(q=query, start=st, size=SZ); st <- st + SZ
https://api-omicidx.cancerdatasci.org/sra/1.0/search/full?q=sample_taxon_id%3A9606%20experiment_library_strategy%3A%22rna%20seq%22%20experiment_library_source%3Atranscriptomic%20experiment_platform%3Aillumina&start=0&size=500
> print(paste('ncol for batch', st, '=', ncol(df)))

## gist:5d6340fd00f67d307efffa5330aab148
R version 3.5.0 (2018-04-23) -- "Joy in Playing"
Copyright (C) 2018 The R Foundation for Statistical Computing
Platform: x86_64-apple-darwin15.6.0 (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

## by_species.R
library('SRAdb')

# If you have SRAmetadb already, set this directory, or setwd appropriately
# to save yourself a large download
sqlfile <- file.path('.', 'SRAmetadb.sqlite')
if(!file.exists('SRAmetadb.sqlite')) sqlfile <<- getSRAdbFile()
sra_con <- dbConnect(SQLite(),sqlfile)
q <- function(x) { dbGetQuery(sra_con, x) }

species_to_tax_id = list(

## benchmarks.R
#!/usr/bin/env Rscript

# source('http://bioconductor.org/biocLite.R')
# bioClite('recount')
# bioClite('GenomicRanges')
# bioCLite('LieberInstitute/recount.bwtool')

library('GenomicRanges')
library('recount')
library('recount.bwtool')

## CG_Radix
{
 "metadata": {
  "name": "",
  "signature": "sha256:0e059d6141a29e725dcfd5b05c7fa0fe7e20abcd0698fa34124a7c847f09627d"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [

## FASTQ.json
{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {

## FASTA.json
{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {

## CG_Naive.json
{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
	system('curl https://trace.ncbi.nlm.nih.gov/Traces/sra/sra_stat.cgi > /tmp/stats.csv')
	st <- read.table('/tmp/stats.csv', sep=',', header=T)
	st$date <- as.Date(st$date, format='%m/%d/%Y')
	i <- min(which(st$bases >= 0.5625e16))
	id1 <- i
	id2 <- min(which(st$bases >= 1.125e16))
	id3 <- min(which(st$bases >= 2.25e16))
	id4 <- min(which(st$bases >= 4.5e16))
	id5 <- min(which(st$bases >= 8.95e16))
	plot(st$date[id1:id5], log10(st$bases[id1:id5]), type='l', xlab="Date", ylab="log10(Total SRA bases)")
	#!/usr/bin/env python

	# Authors: Chris Wilks (original) and Ben Langmead (modifications)
	# Date: 7/3/2018
	# License: MIT

	"""sradbv2
	Usage:
	sradbv2 search <lucene-search> [options]
	sradbv2 query [<SRP>,<SRR>]... [options]
	> query=paste(
	+ 'sample_taxon_id:9606',
	+ 'experiment_library_strategy:"rna seq"',
	+ 'experiment_library_source:transcriptomic',
	+ 'experiment_platform:illumina')
	> st <- 0
	> SZ <- 500
	> df <- sra_full_search(q=query, start=st, size=SZ); st <- st + SZ
	https://api-omicidx.cancerdatasci.org/sra/1.0/search/full?q=sample_taxon_id%3A9606%20experiment_library_strategy%3A%22rna%20seq%22%20experiment_library_source%3Atranscriptomic%20experiment_platform%3Aillumina&start=0&size=500
	> print(paste('ncol for batch', st, '=', ncol(df)))
	R version 3.5.0 (2018-04-23) -- "Joy in Playing"
	Copyright (C) 2018 The R Foundation for Statistical Computing
	Platform: x86_64-apple-darwin15.6.0 (64-bit)

	R is free software and comes with ABSOLUTELY NO WARRANTY.
	You are welcome to redistribute it under certain conditions.
	Type 'license()' or 'licence()' for distribution details.

	Natural language support but running in an English locale
	library('SRAdb')

	# If you have SRAmetadb already, set this directory, or setwd appropriately
	# to save yourself a large download
	sqlfile <- file.path('.', 'SRAmetadb.sqlite')
	if(!file.exists('SRAmetadb.sqlite')) sqlfile <<- getSRAdbFile()
	sra_con <- dbConnect(SQLite(),sqlfile)
	q <- function(x) { dbGetQuery(sra_con, x) }

	species_to_tax_id = list(
	#!/usr/bin/env Rscript

	# source('http://bioconductor.org/biocLite.R')
	# bioClite('recount')
	# bioClite('GenomicRanges')
	# bioCLite('LieberInstitute/recount.bwtool')

	library('GenomicRanges')
	library('recount')
	library('recount.bwtool')
	{
	"metadata": {
	"name": "",
	"signature": "sha256:0e059d6141a29e725dcfd5b05c7fa0fe7e20abcd0698fa34124a7c847f09627d"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"metadata": {
	"name": ""
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{