Last active
April 30, 2016 00:52
-
-
Save sminot/dd9ce06a7677bf8fd9b47e0e096d0a91 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Copyright Reference Genomics, Inc. 2016 | |
# Released under the MIT License | |
# | |
# Script for fetching analysis results from the One Codex API (v0) | |
# See https://docs.onecodex.com for full documentation on the REST API | |
# | |
# Script can be run from the command line with: | |
# `Rscript fetch_ocx_analyses.R $ONE_CODEX_API_KEY $output_filepath` | |
# | |
library(httr) | |
library(plyr) | |
library(reshape2) | |
# 1. Get the list of all analyses for this account | |
# | |
# Note: This returns all metagenomic classification results | |
# for all samples and reference database permutations | |
ListAnalyses <- function(api_key) { | |
r <- GET(url = "https://app.onecodex.com/api/v0/analyses", | |
authenticate(api_key, '')) | |
return(content(r)) | |
} | |
# 2. Function to fetch the results of a given analyses | |
# Returns a dataframe with columns including: | |
# name, rank, tax_id, readcount, and readcount_w_children | |
# | |
# Note: We use the /extended_table route here | |
# which provides a few extra fields and records, | |
# but is otherwise similar to the documented /table | |
# REST route (including entries for taxonomic nodes | |
# to which 0 reads map, e.g., family nodes, as well as | |
# species-level abundance estimates where available) | |
GetAnalysisResult <- function(api_key, analysis) { | |
url <- paste("https://app.onecodex.com/api/v0/analyses", | |
analysis$id, | |
"extended_table", | |
sep='/') | |
r <- GET(url = url, | |
authenticate(api_key, '')) | |
if (status_code(r) != 200) { | |
print(paste("Not able to fetch results for", analysis$sample_filename)) | |
return(data.frame()) | |
} | |
# Convert JSON content into a data.frame | |
r <- content(r) | |
r <- ldply(r, data.frame) | |
# Add the analysis$sample_filename name into the data.frame | |
r$sample_filename <- analysis$sample_filename | |
r$id <- analysis$id | |
r$sample_id <- analysis$sample_id | |
r$reference_name <- analysis$reference_name | |
r$reference_id <- analysis$reference_id | |
r$n_mapped_reads <- sum(r$readcount) | |
return(r) | |
} | |
# 3. Function to grab all analyses and combine them into a "wide" | |
# dataframe (optionally filtered by filename or ID and reference database | |
# name or ID) | |
GetAggregatedAnalyses <- function(api_key, | |
reference_dbs=c("One Codex Database"), | |
fetch_only=c()) { | |
analysis_list <- ListAnalyses(api_key) | |
analysis_results <- data.frame(stringsAsFactors=FALSE) | |
for (a in analysis_list) { | |
# Skip analyses for non-specified reference databases as appropriate | |
if (length(reference_dbs) > 0 && | |
!(a$reference_name %in% reference_dbs) && | |
!(a$reference_id %in% reference_dbs)) { | |
next | |
} | |
# Skip analyses not matching the filename/ID if specified in `fetch_only` | |
if(length(fetch_only) > 0 && | |
!(a$sample_filename %in% fetch_only) && | |
!(a$sample_id %in% fetch_only)) { | |
next | |
} | |
# Add fetched rows to analysis_results dataframe | |
result <- GetAnalysisResult(api_key, a) | |
print(paste("Fetched results for", a$sample_filename)) | |
output_fields <- c('name', 'rank', 'readcount_w_children', | |
'tax_id', 'sample_filename', 'id', | |
'sample_id', 'reference_name', 'reference_id', | |
'n_mapped_reads') | |
result <- subset(result, select=output_fields) | |
analysis_results <- rbind(analysis_results, result) | |
} | |
if(nrow(analysis_results) == 0){ | |
print("No results found, exiting.") | |
return() | |
} | |
# Reshape the results into a wide table | |
analysis_wide <- dcast(analysis_results, | |
name + rank + tax_id ~ sample_id, | |
value.var='readcount_w_children', | |
fun.aggregate = mean) | |
analysis_wide[is.na(analysis_wide)] <- 0 | |
sample_ids <- names(analysis_wide)[4:length(names(analysis_wide))] | |
for(field in c('n_mapped_reads', 'reference_id', 'reference_name', | |
'id', 'sample_filename')){ | |
field_wide <- data.frame(name=c(field), | |
rank=c(''), | |
tax_id=c(''), | |
stringsAsFactors=FALSE) | |
for(s in sample_ids){ | |
field_wide[s] <- head(subset(analysis_results, sample_id == s), 1)[field] | |
} | |
analysis_wide <- rbind(field_wide, analysis_wide) | |
} | |
return(analysis_wide) | |
} | |
# The commands below allow this script to be run from the command line | |
# Usage: Rscript fetch_ocx_analyses.R $ONE_CODEX_API_KEY $output_filepath | |
args = commandArgs(trailingOnly=TRUE) | |
if (length(args) >= 2) { | |
api_key <- args[1] | |
output_fp <- args[2] | |
if(length(args) > 2){ | |
fetch_only <- args[3:length(args)] | |
}else{fetch_only<-c()} | |
print(paste("Downloading analysis using API KEY", | |
api_key, | |
"and saving to", | |
output_fp)) | |
analyses <- GetAggregatedAnalyses(api_key, fetch_only=fetch_only) | |
write.table(analyses, file=output_fp, sep=',', row.names=FALSE) | |
} else { | |
print("Usage: Rscript fetch_ocx_analyses.R $API_KEY $output_filepath") | |
print("") | |
print("Note: An optional list of sample IDs and filenames may also be ") | |
print("passed after $output_filepath, fetching only those results.") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment