Skip to content

Instantly share code, notes, and snippets.

@tiagochst
Last active November 9, 2023 12:56
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tiagochst/a701bad3fa3800ade7063760755e0aad to your computer and use it in GitHub Desktop.
Save tiagochst/a701bad3fa3800ade7063760755e0aad to your computer and use it in GitHub Desktop.
Using TCGAbiolinks with GDC (still in development)
# ------------------------------------------------------------------
# Updating TCGAbiolinks to work with GDC data
# --------------------------------------------------------------------
# Install last version from the github (this is a development version)
devtools::install_github("BioinformaticsFMRP/TCGAbiolinks")
library(TCGAbiolinks)
####################### Working harmonized data ###########################
# Data.category: clinical and biospecimen
############################################################################
# Clinical information
# https://gdc.nci.nih.gov/about-data/data-harmonization-and-generation/clinical-data-harmonization
clin <- GDCquery_clinic("TCGA-ACC", type = "clinical", save.csv = TRUE)
clin <- GDCquery_clinic("TCGA-ACC", type = "biospecimen", save.csv = TRUE)
#-----------------------------------------------------------------------------
# Data.category: MAF files
#-----------------------------------------------------------------------------
mut <- GDCquery_Maf(tumor = "ACC")
clin <- GDCquery_clinic("TCGA-ACC","clinical")
clin <- clin[,c("bcr_patient_barcode","disease","gender","tumor_stage","race","vital_status")]
TCGAvisualize_oncoprint(mut = mut, genes = mut$Hugo_Symbol[1:20],
filename = "onco.pdf",
annotation = clin,
color=c("background"="#CCCCCC","DEL"="purple","INS"="yellow","SNP"="brown"),
rows.font.size=10,
heatmap.legend.side = "right",
dist.col = 0,
label.font.size = 10)
#-----------------------------------------------------------------------------
# Data.category: Copy number variation
#-----------------------------------------------------------------------------
query <- GDCquery(project = "TCGA-ACC",
data.category = "Copy Number Variation",
data.type = "Copy Number Segment",
barcode = c( "TCGA-OR-A5KU-01A-11D-A29H-01", "TCGA-OR-A5JK-01A-11D-A29H-01"))
GDCdownload(query)
data <- GDCprepare(query)
query <- GDCquery("TCGA-ACC",
"Copy Number Variation",
data.type = "Masked Copy Number Segment",
sample.type = c("Primary solid Tumor")) # query$results[[1]]$cases
GDCdownload(query)
data <- GDCprepare(query)
#-----------------------------------------------------------------------------
# Data.category: Transcriptome Profiling
#-----------------------------------------------------------------------------
workflow.type <- c("HTSeq - Counts", "HTSeq - FPKM","HTSeq - FPKM-UQ")
for(i in workflow.type){
print(i)
query <- GDCquery(project = "TARGET-AML",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = i,
barcode = c("TARGET-20-PADZCG-04A-01R","TARGET-20-PARJCR-09A-01R"))
GDCdownload(query)
data <- GDCprepare(query)
}
#data.type <- c("miRNA Expression Quantification","Isoform Expression Quantification")
data.type <- c("miRNA Expression Quantification")
for(i in data.type){
print(i)
query <- GDCquery(project = "TARGET-AML",
data.category = "Transcriptome Profiling",
data.type = i,
workflow.type = "BCGSC miRNA Profiling",
barcode = c("TARGET-20-PARUDL-03A-01R","TARGET-20-PASRRB-03A-01R"))
GDCdownload(query)
data <- GDCprepare(query)
print(head(data))
}
####################### Working with Legacy data ###########################
# Data.category: Copy number variation
############################################################################
query <- GDCquery(project = "TCGA-ACC",
data.category = "Copy number variation",
legacy = TRUE,
file.type = "nocnv_hg19.seg",
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01"))
GDCdownload(query)
z <- GDCprepare(query)
query <- GDCquery(project = "TCGA-ACC",
data.category = "Copy number variation",
legacy = TRUE,
file.type = "nocnv_hg18.seg",
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01"))
GDCdownload(query)
z <- GDCprepare(query)
query <- GDCquery(project = "TCGA-ACC",
data.category = "Copy number variation",
legacy = TRUE,
file.type = "hg19.seg",
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01"))
GDCdownload(query)
z <- GDCprepare(query)
query <- GDCquery(project = "TCGA-LGG",
barcode = c("TCGA-HT-7476-10A-01D-2022-02", "TCGA-FG-6689-01A-11D-1891-02"),
data.category = "Copy number variation", platform = "Illumina HiSeq", legacy = TRUE)
GDCdownload(query)
z <- GDCprepare(query)
query <- GDCquery(project = "TCGA-ACC",
data.category = "Copy number variation",
legacy = TRUE,
file.type = "hg18.seg",
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01"))
GDCdownload(query)
z <- GDCprepare(query)
####################### Working with Legacy data ###########################
# Data.category: DNA methylation & Protein expression
############################################################################
# Function to get two samples to test the function
legacyPipeline <- function(project, data.category, platform){
query <- GDCquery(project = project,
data.category = data.category,
platform = platform,
legacy = TRUE)
cases <- query$results[[1]]$cases[1:2]
query <- GDCquery(project = project,
data.category = data.category,
platform = platform,
legacy = TRUE,
barcode = cases)
GDCdownload(query)
data <- GDCprepare(query)
return(data)
}
# DNA methylation
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina Human Methylation 27")
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina Human Methylation 450")
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina DNA Methylation OMA003 CPI")
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina DNA Methylation OMA002 CPI")
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina DNA Methylation OMA002 CPI")
# Protein expression
data <- legacyPipeline("TCGA-GBM","Protein expression","MDA_RPPA_Core")
@tiagochst
Copy link
Author

tiagochst commented Aug 29, 2019 via email

@behnazhoseyni
Copy link

yes.

@behnazhoseyni
Copy link

behnazhoseyni commented Aug 29, 2019

please check this page
http://www.bioconductor.org/packages/devel/bioc/vignettes/TCGAbiolinks/inst/doc/download_prepare.html
i find this sentence about 'Harmonized data'
This function is still under development, it is not working for all cases.
is it the reason that above code doesn't work?

@tiagochst
Copy link
Author

It is working here:

Screenshot from 2019-08-29 15-28-25

@behnazhoseyni
Copy link

oh, i'm amazed
i try to find why it isn't work.
thank you so much.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment