Skip to content

Instantly share code, notes, and snippets.

@BenLangmead
Created November 22, 2017 19:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BenLangmead/5ca5815b5c444b1bf183defefb3e26a5 to your computer and use it in GitHub Desktop.
Save BenLangmead/5ca5815b5c444b1bf183defefb3e26a5 to your computer and use it in GitHub Desktop.
sradb RNA-seq samps by species
library('SRAdb')
# If you have SRAmetadb already, set this directory, or setwd appropriately
# to save yourself a large download
sqlfile <- file.path('.', 'SRAmetadb.sqlite')
if(!file.exists('SRAmetadb.sqlite')) sqlfile <<- getSRAdbFile()
sra_con <- dbConnect(SQLite(),sqlfile)
q <- function(x) { dbGetQuery(sra_con, x) }
species_to_tax_id = list(
'arabidopsis_thaliana'=3702, # 4. arabidopsis
'bos_taurus'=9913, # 9: cow
'caenorhabditis_elegans'=6239, # >10: roundworm
'danio_rerio'=7955, # 3: zebrafish
'drosophila_melanogaster'=7227, # 5: fruitfly
'homo_sapiens'=9606, # 2: human
'mus_musculus'=10090, # 1: mouse
'ovis_aries'=9940, # 10: sheep
'rattus_norvegicus'=10116, # 7: rat
'saccharomyces_cerevisiae'=4932, # 6: yeast
'zea_mays'=4577) # 8: corn
# > sort(table(taxids),decreasing=TRUE)[1:10]
# taxids
# 10090 9606 7955 3702 7227 4932 10116 4577 9913 9940
# 195878 156394 23590 17296 17006 7997 7098 7018 5173 5018
species_table <- function(taxid) {
q(paste(
'SELECT * FROM sra, study, submission, fastq, run',
'WHERE sra.platform = "ILLUMINA"',
' AND sra.library_strategy = "RNA-Seq"',
' AND sra.library_source = "TRANSCRIPTOMIC"',
' AND sra.submission_accession = submission.submission_accession',
' AND sra.study_accession = study.study_accession',
' AND sra.run_accession = fastq.run_accession',
' AND sra.run_accession = run.run_accession',
' AND fastq.FASTQ_FILES > 0',
sprintf('AND sra.taxon_id = %d', taxid)))
}
runs_by_species <- lapply(species_to_tax_id, species_table)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment