Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save dlaehnemann/97fa14cd8a397080ed5751383ee66933 to your computer and use it in GitHub Desktop.
Save dlaehnemann/97fa14cd8a397080ed5751383ee66933 to your computer and use it in GitHub Desktop.
R example to get purity from the GDC HTTP request API
library("tidyverse")
library("httr")
try_fields <- function(case_hash) {
print(str_c("Now on case with hash:", case_hash, sep=" ") )
# all fields that can contain a tumor purity value
fields = c(
"files.cases.samples.portions.slides.percent_tumor_cells",
"files.cases.samples.portions.slides.percent_tumor_nuclei",
"samples.portions.slides.percent_tumor_cells",
"samples.portions.slides.percent_tumor_nuclei"
)
for (field in fields) {
url = str_c(
"https://api.gdc.cancer.gov/cases/",
case_hash,
# here, "?expand=" is a good alternative to examine all entries at a
# particular level, e.g. "samples.portions.slides" gives all info for
# all slides
"?fields=",
field,
# using "$format=JSON" probably allows for working with the results
# more systematically, as TSV simply flattens all info into separate columns
"&format=TSV",
collapse = ""
)
print(str_c("Now trying url:", url, sep=" ") )
response <-
content(
GET(
url = url,
timeout(7)
)
)
if (length(response) == 1) {
break
}
}
if (length(response) == 1) {
print(str_c("found purity:", response[[1]], sep=" ") )
return(response[[1]])
} else {
return(NA)
}
}
# using the sapply on a vector of entries, allows to use
# this function in a dplyr::mutate() statement
get_purity <- function(case_hashes) {
return(sapply(case_hashes, try_fields))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment