Last active
November 9, 2023 12:56
-
-
Save tiagochst/a701bad3fa3800ade7063760755e0aad to your computer and use it in GitHub Desktop.
Using TCGAbiolinks with GDC (still in development)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------------ | |
# Updating TCGAbiolinks to work with GDC data | |
# -------------------------------------------------------------------- | |
# Install last version from the github (this is a development version) | |
devtools::install_github("BioinformaticsFMRP/TCGAbiolinks") | |
library(TCGAbiolinks) | |
####################### Working harmonized data ########################### | |
# Data.category: clinical and biospecimen | |
############################################################################ | |
# Clinical information | |
# https://gdc.nci.nih.gov/about-data/data-harmonization-and-generation/clinical-data-harmonization | |
clin <- GDCquery_clinic("TCGA-ACC", type = "clinical", save.csv = TRUE) | |
clin <- GDCquery_clinic("TCGA-ACC", type = "biospecimen", save.csv = TRUE) | |
#----------------------------------------------------------------------------- | |
# Data.category: MAF files | |
#----------------------------------------------------------------------------- | |
mut <- GDCquery_Maf(tumor = "ACC") | |
clin <- GDCquery_clinic("TCGA-ACC","clinical") | |
clin <- clin[,c("bcr_patient_barcode","disease","gender","tumor_stage","race","vital_status")] | |
TCGAvisualize_oncoprint(mut = mut, genes = mut$Hugo_Symbol[1:20], | |
filename = "onco.pdf", | |
annotation = clin, | |
color=c("background"="#CCCCCC","DEL"="purple","INS"="yellow","SNP"="brown"), | |
rows.font.size=10, | |
heatmap.legend.side = "right", | |
dist.col = 0, | |
label.font.size = 10) | |
#----------------------------------------------------------------------------- | |
# Data.category: Copy number variation | |
#----------------------------------------------------------------------------- | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy Number Variation", | |
data.type = "Copy Number Segment", | |
barcode = c( "TCGA-OR-A5KU-01A-11D-A29H-01", "TCGA-OR-A5JK-01A-11D-A29H-01")) | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
query <- GDCquery("TCGA-ACC", | |
"Copy Number Variation", | |
data.type = "Masked Copy Number Segment", | |
sample.type = c("Primary solid Tumor")) # query$results[[1]]$cases | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
#----------------------------------------------------------------------------- | |
# Data.category: Transcriptome Profiling | |
#----------------------------------------------------------------------------- | |
workflow.type <- c("HTSeq - Counts", "HTSeq - FPKM","HTSeq - FPKM-UQ") | |
for(i in workflow.type){ | |
print(i) | |
query <- GDCquery(project = "TARGET-AML", | |
data.category = "Transcriptome Profiling", | |
data.type = "Gene Expression Quantification", | |
workflow.type = i, | |
barcode = c("TARGET-20-PADZCG-04A-01R","TARGET-20-PARJCR-09A-01R")) | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
} | |
#data.type <- c("miRNA Expression Quantification","Isoform Expression Quantification") | |
data.type <- c("miRNA Expression Quantification") | |
for(i in data.type){ | |
print(i) | |
query <- GDCquery(project = "TARGET-AML", | |
data.category = "Transcriptome Profiling", | |
data.type = i, | |
workflow.type = "BCGSC miRNA Profiling", | |
barcode = c("TARGET-20-PARUDL-03A-01R","TARGET-20-PASRRB-03A-01R")) | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
print(head(data)) | |
} | |
####################### Working with Legacy data ########################### | |
# Data.category: Copy number variation | |
############################################################################ | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy number variation", | |
legacy = TRUE, | |
file.type = "nocnv_hg19.seg", | |
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01")) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy number variation", | |
legacy = TRUE, | |
file.type = "nocnv_hg18.seg", | |
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01")) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy number variation", | |
legacy = TRUE, | |
file.type = "hg19.seg", | |
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01")) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
query <- GDCquery(project = "TCGA-LGG", | |
barcode = c("TCGA-HT-7476-10A-01D-2022-02", "TCGA-FG-6689-01A-11D-1891-02"), | |
data.category = "Copy number variation", platform = "Illumina HiSeq", legacy = TRUE) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy number variation", | |
legacy = TRUE, | |
file.type = "hg18.seg", | |
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01")) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
####################### Working with Legacy data ########################### | |
# Data.category: DNA methylation & Protein expression | |
############################################################################ | |
# Function to get two samples to test the function | |
legacyPipeline <- function(project, data.category, platform){ | |
query <- GDCquery(project = project, | |
data.category = data.category, | |
platform = platform, | |
legacy = TRUE) | |
cases <- query$results[[1]]$cases[1:2] | |
query <- GDCquery(project = project, | |
data.category = data.category, | |
platform = platform, | |
legacy = TRUE, | |
barcode = cases) | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
return(data) | |
} | |
# DNA methylation | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina Human Methylation 27") | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina Human Methylation 450") | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina DNA Methylation OMA003 CPI") | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina DNA Methylation OMA002 CPI") | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina DNA Methylation OMA002 CPI") | |
# Protein expression | |
data <- legacyPipeline("TCGA-GBM","Protein expression","MDA_RPPA_Core") | |
please check this page
http://www.bioconductor.org/packages/devel/bioc/vignettes/TCGAbiolinks/inst/doc/download_prepare.html
i find this sentence about 'Harmonized data'
This function is still under development, it is not working for all cases.
is it the reason that above code doesn't work?
oh, i'm amazed
i try to find why it isn't work.
thank you so much.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
yes.