-
-
Save tiagochst/a701bad3fa3800ade7063760755e0aad to your computer and use it in GitHub Desktop.
# ------------------------------------------------------------------ | |
# Updating TCGAbiolinks to work with GDC data | |
# -------------------------------------------------------------------- | |
# Install last version from the github (this is a development version) | |
devtools::install_github("BioinformaticsFMRP/TCGAbiolinks") | |
library(TCGAbiolinks) | |
####################### Working harmonized data ########################### | |
# Data.category: clinical and biospecimen | |
############################################################################ | |
# Clinical information | |
# https://gdc.nci.nih.gov/about-data/data-harmonization-and-generation/clinical-data-harmonization | |
clin <- GDCquery_clinic("TCGA-ACC", type = "clinical", save.csv = TRUE) | |
clin <- GDCquery_clinic("TCGA-ACC", type = "biospecimen", save.csv = TRUE) | |
#----------------------------------------------------------------------------- | |
# Data.category: MAF files | |
#----------------------------------------------------------------------------- | |
mut <- GDCquery_Maf(tumor = "ACC") | |
clin <- GDCquery_clinic("TCGA-ACC","clinical") | |
clin <- clin[,c("bcr_patient_barcode","disease","gender","tumor_stage","race","vital_status")] | |
TCGAvisualize_oncoprint(mut = mut, genes = mut$Hugo_Symbol[1:20], | |
filename = "onco.pdf", | |
annotation = clin, | |
color=c("background"="#CCCCCC","DEL"="purple","INS"="yellow","SNP"="brown"), | |
rows.font.size=10, | |
heatmap.legend.side = "right", | |
dist.col = 0, | |
label.font.size = 10) | |
#----------------------------------------------------------------------------- | |
# Data.category: Copy number variation | |
#----------------------------------------------------------------------------- | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy Number Variation", | |
data.type = "Copy Number Segment", | |
barcode = c( "TCGA-OR-A5KU-01A-11D-A29H-01", "TCGA-OR-A5JK-01A-11D-A29H-01")) | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
query <- GDCquery("TCGA-ACC", | |
"Copy Number Variation", | |
data.type = "Masked Copy Number Segment", | |
sample.type = c("Primary solid Tumor")) # query$results[[1]]$cases | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
#----------------------------------------------------------------------------- | |
# Data.category: Transcriptome Profiling | |
#----------------------------------------------------------------------------- | |
workflow.type <- c("HTSeq - Counts", "HTSeq - FPKM","HTSeq - FPKM-UQ") | |
for(i in workflow.type){ | |
print(i) | |
query <- GDCquery(project = "TARGET-AML", | |
data.category = "Transcriptome Profiling", | |
data.type = "Gene Expression Quantification", | |
workflow.type = i, | |
barcode = c("TARGET-20-PADZCG-04A-01R","TARGET-20-PARJCR-09A-01R")) | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
} | |
#data.type <- c("miRNA Expression Quantification","Isoform Expression Quantification") | |
data.type <- c("miRNA Expression Quantification") | |
for(i in data.type){ | |
print(i) | |
query <- GDCquery(project = "TARGET-AML", | |
data.category = "Transcriptome Profiling", | |
data.type = i, | |
workflow.type = "BCGSC miRNA Profiling", | |
barcode = c("TARGET-20-PARUDL-03A-01R","TARGET-20-PASRRB-03A-01R")) | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
print(head(data)) | |
} | |
####################### Working with Legacy data ########################### | |
# Data.category: Copy number variation | |
############################################################################ | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy number variation", | |
legacy = TRUE, | |
file.type = "nocnv_hg19.seg", | |
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01")) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy number variation", | |
legacy = TRUE, | |
file.type = "nocnv_hg18.seg", | |
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01")) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy number variation", | |
legacy = TRUE, | |
file.type = "hg19.seg", | |
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01")) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
query <- GDCquery(project = "TCGA-LGG", | |
barcode = c("TCGA-HT-7476-10A-01D-2022-02", "TCGA-FG-6689-01A-11D-1891-02"), | |
data.category = "Copy number variation", platform = "Illumina HiSeq", legacy = TRUE) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
query <- GDCquery(project = "TCGA-ACC", | |
data.category = "Copy number variation", | |
legacy = TRUE, | |
file.type = "hg18.seg", | |
barcode = c("TCGA-OR-A5LR-01A-11D-A29H-01", "TCGA-OR-A5LJ-10A-01D-A29K-01")) | |
GDCdownload(query) | |
z <- GDCprepare(query) | |
####################### Working with Legacy data ########################### | |
# Data.category: DNA methylation & Protein expression | |
############################################################################ | |
# Function to get two samples to test the function | |
legacyPipeline <- function(project, data.category, platform){ | |
query <- GDCquery(project = project, | |
data.category = data.category, | |
platform = platform, | |
legacy = TRUE) | |
cases <- query$results[[1]]$cases[1:2] | |
query <- GDCquery(project = project, | |
data.category = data.category, | |
platform = platform, | |
legacy = TRUE, | |
barcode = cases) | |
GDCdownload(query) | |
data <- GDCprepare(query) | |
return(data) | |
} | |
# DNA methylation | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina Human Methylation 27") | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina Human Methylation 450") | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina DNA Methylation OMA003 CPI") | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina DNA Methylation OMA002 CPI") | |
data <- legacyPipeline("TCGA-GBM","DNA methylation","Illumina DNA Methylation OMA002 CPI") | |
# Protein expression | |
data <- legacyPipeline("TCGA-GBM","Protein expression","MDA_RPPA_Core") | |
hello . i need slide image for TCGA project from gdc.
i can get this by gdc portal and set data type=slide image ,Experimental Strategy=Diagnostic Slide.
but i need get this data with api method and R.
i try with :
GDCquery(project = "TCGA-OV",data.category = "Biospecimen",
data.type = 'Slide Image')but i get this :
sort(harmonized.data.type) Biospecimen Supplement Clinical Supplement Copy Number Segment Gene Expression Quantification Gene Level Copy Number Scores Isoform Expression Quantification Masked Copy Number Segment Masked Somatic Mutation miRNA Expression Quantification Error in checkDataTypeInput(legacy = legacy, data.type = data.type) : Please set a data.type argument from the column harmonized.data.type above please help me.
I just updated the data.type check. Please you need to update the package from github?
You can update with:
withr::with_envvar(c(R_REMOTES_NO_ERRORS_FROM_WARNINGS="true"),
remotes::install_github('BioinformaticsFMRP/TCGAbiolinks')
)
The code below should work:
query <- GDCquery(project = "TCGA-OV",
data.category = "Biospecimen",
data.type = 'Slide Image')
GDCdownload(query,files.per.chunk = 2)
thank you.
i updated , but doesn't work ,and get same error
yes.
please check this page
http://www.bioconductor.org/packages/devel/bioc/vignettes/TCGAbiolinks/inst/doc/download_prepare.html
i find this sentence about 'Harmonized data'
This function is still under development, it is not working for all cases.
is it the reason that above code doesn't work?
oh, i'm amazed
i try to find why it isn't work.
thank you so much.
hello . i need slide image for TCGA project from gdc.
i can get this by gdc portal and set data type=slide image ,Experimental Strategy=Diagnostic Slide.
but i need get this data with api method and R.
i try with :
GDCquery(project = "TCGA-OV",data.category = "Biospecimen",
data.type = 'Slide Image')
but i get this :
|sort(harmonized.data.type) |
|:---------------------------------|
|Biospecimen Supplement |
|Clinical Supplement |
|Copy Number Segment |
|Gene Expression Quantification |
|Gene Level Copy Number Scores |
|Isoform Expression Quantification |
|Masked Copy Number Segment |
|Masked Somatic Mutation |
|miRNA Expression Quantification |
Error in checkDataTypeInput(legacy = legacy, data.type = data.type) :
Please set a data.type argument from the column harmonized.data.type above
please help me.