Skip to content

Instantly share code, notes, and snippets.

@sminot
Last active April 30, 2016 00:52
Show Gist options
  • Save sminot/dd9ce06a7677bf8fd9b47e0e096d0a91 to your computer and use it in GitHub Desktop.
Save sminot/dd9ce06a7677bf8fd9b47e0e096d0a91 to your computer and use it in GitHub Desktop.
#
# Copyright Reference Genomics, Inc. 2016
# Released under the MIT License
#
# Script for fetching analysis results from the One Codex API (v0)
# See https://docs.onecodex.com for full documentation on the REST API
#
# Script can be run from the command line with:
# `Rscript fetch_ocx_analyses.R $ONE_CODEX_API_KEY $output_filepath`
#
library(httr)
library(plyr)
library(reshape2)
# 1. Get the list of all analyses for this account
#
# Note: This returns all metagenomic classification results
# for all samples and reference database permutations
ListAnalyses <- function(api_key) {
r <- GET(url = "https://app.onecodex.com/api/v0/analyses",
authenticate(api_key, ''))
return(content(r))
}
# 2. Function to fetch the results of a given analyses
# Returns a dataframe with columns including:
# name, rank, tax_id, readcount, and readcount_w_children
#
# Note: We use the /extended_table route here
# which provides a few extra fields and records,
# but is otherwise similar to the documented /table
# REST route (including entries for taxonomic nodes
# to which 0 reads map, e.g., family nodes, as well as
# species-level abundance estimates where available)
GetAnalysisResult <- function(api_key, analysis) {
url <- paste("https://app.onecodex.com/api/v0/analyses",
analysis$id,
"extended_table",
sep='/')
r <- GET(url = url,
authenticate(api_key, ''))
if (status_code(r) != 200) {
print(paste("Not able to fetch results for", analysis$sample_filename))
return(data.frame())
}
# Convert JSON content into a data.frame
r <- content(r)
r <- ldply(r, data.frame)
# Add the analysis$sample_filename name into the data.frame
r$sample_filename <- analysis$sample_filename
r$id <- analysis$id
r$sample_id <- analysis$sample_id
r$reference_name <- analysis$reference_name
r$reference_id <- analysis$reference_id
r$n_mapped_reads <- sum(r$readcount)
return(r)
}
# 3. Function to grab all analyses and combine them into a "wide"
# dataframe (optionally filtered by filename or ID and reference database
# name or ID)
GetAggregatedAnalyses <- function(api_key,
reference_dbs=c("One Codex Database"),
fetch_only=c()) {
analysis_list <- ListAnalyses(api_key)
analysis_results <- data.frame(stringsAsFactors=FALSE)
for (a in analysis_list) {
# Skip analyses for non-specified reference databases as appropriate
if (length(reference_dbs) > 0 &&
!(a$reference_name %in% reference_dbs) &&
!(a$reference_id %in% reference_dbs)) {
next
}
# Skip analyses not matching the filename/ID if specified in `fetch_only`
if(length(fetch_only) > 0 &&
!(a$sample_filename %in% fetch_only) &&
!(a$sample_id %in% fetch_only)) {
next
}
# Add fetched rows to analysis_results dataframe
result <- GetAnalysisResult(api_key, a)
print(paste("Fetched results for", a$sample_filename))
output_fields <- c('name', 'rank', 'readcount_w_children',
'tax_id', 'sample_filename', 'id',
'sample_id', 'reference_name', 'reference_id',
'n_mapped_reads')
result <- subset(result, select=output_fields)
analysis_results <- rbind(analysis_results, result)
}
if(nrow(analysis_results) == 0){
print("No results found, exiting.")
return()
}
# Reshape the results into a wide table
analysis_wide <- dcast(analysis_results,
name + rank + tax_id ~ sample_id,
value.var='readcount_w_children',
fun.aggregate = mean)
analysis_wide[is.na(analysis_wide)] <- 0
sample_ids <- names(analysis_wide)[4:length(names(analysis_wide))]
for(field in c('n_mapped_reads', 'reference_id', 'reference_name',
'id', 'sample_filename')){
field_wide <- data.frame(name=c(field),
rank=c(''),
tax_id=c(''),
stringsAsFactors=FALSE)
for(s in sample_ids){
field_wide[s] <- head(subset(analysis_results, sample_id == s), 1)[field]
}
analysis_wide <- rbind(field_wide, analysis_wide)
}
return(analysis_wide)
}
# The commands below allow this script to be run from the command line
# Usage: Rscript fetch_ocx_analyses.R $ONE_CODEX_API_KEY $output_filepath
args = commandArgs(trailingOnly=TRUE)
if (length(args) >= 2) {
api_key <- args[1]
output_fp <- args[2]
if(length(args) > 2){
fetch_only <- args[3:length(args)]
}else{fetch_only<-c()}
print(paste("Downloading analysis using API KEY",
api_key,
"and saving to",
output_fp))
analyses <- GetAggregatedAnalyses(api_key, fetch_only=fetch_only)
write.table(analyses, file=output_fp, sep=',', row.names=FALSE)
} else {
print("Usage: Rscript fetch_ocx_analyses.R $API_KEY $output_filepath")
print("")
print("Note: An optional list of sample IDs and filenames may also be ")
print("passed after $output_filepath, fetching only those results.")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment