Skip to content

Instantly share code, notes, and snippets.

@tiagochst
Last active April 21, 2017 19:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tiagochst/78dee5db43fb6836df4d46d9339ca450 to your computer and use it in GitHub Desktop.
Save tiagochst/78dee5db43fb6836df4d46d9339ca450 to your computer and use it in GitHub Desktop.
# This code will get all clinical indexed data from TCGA
library(TCGAbiolinks)
library(data.table)
clinical <- TCGAbiolinks:::getGDCprojects()$project_id %>%
regexPipes::grep("TCGA",value=T) %>%
sort %>%
plyr::alply(1,GDCquery_clinic, .progress = "text") %>%
rbindlist
readr::write_csv(clinical,path = paste0("all_clin_indexed.csv"))
# This code will get all clinical XML data from TCGA
getclinical <- function(proj){
message(proj)
while(1){
result = tryCatch({
query <- GDCquery(project = proj, data.category = "Clinical")
GDCdownload(query)
clinical <- GDCprepare_clinic(query, clinical.info = "patient")
for(i in c("admin","radiation","follow_up","drug","new_tumor_event")){
message(i)
aux <- GDCprepare_clinic(query, clinical.info = i)
if(is.null(aux)) next
# add suffix manually if it already exists
replicated <- which(grep("bcr_patient_barcode",colnames(aux), value = T,invert = T) %in% colnames(clinical))
colnames(aux)[replicated] <- paste0(colnames(aux)[replicated],".",i)
if(!is.null(aux)) clinical <- merge(clinical,aux,by = "bcr_patient_barcode", all = TRUE)
}
readr::write_csv(clinical,path = paste0(proj,"_clinical_from_XML.csv")) # Save the clinical data into a csv file
return(clinical)
}, error = function(e) {
message(paste0("Error clinical: ", proj))
})
}
}
clinical <- TCGAbiolinks:::getGDCprojects()$project_id %>%
regexPipes::grep("TCGA",value=T) %>%
sort %>%
plyr::alply(1,getclinical, .progress = "text") %>%
rbindlist(fill = TRUE) %>% setDF
readr::write_csv(clinical,path = paste0("all_clin_XML.csv"))
# Get all batch numbers for each patient
library(TCGAbiolinks)
getBatch <- function(proj){
message(proj)
while(1){
result = tryCatch({
query <- GDCquery(project = proj, data.category = "Biospecimen")
GDCdownload(query)
clinical <- GDCprepare_clinic(query, clinical.info = "admin")
readr::write_csv(clinical,path = paste0(proj,"_batch_from_XML.csv")) # Save the clinical data into a csv file
return(clinical)
}, error = function(e) {
message(paste0("Error clinical: ", proj))
})
}
}
biospecimen <- TCGAbiolinks:::getGDCprojects()$project_id %>%
regexPipes::grep("TCGA",value=T) %>%
sort %>%
plyr::alply(1,getBatch, .progress = "text") %>%
rbindlist(fill = TRUE) %>% setDF
readr::write_csv(biospecimen,path = paste0("biospecimen_from_XML.csv"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment