-
-
Save tiagochst/277651ebed998fd3d1952d3fbc376ef2 to your computer and use it in GitHub Desktop.
TCGAbiolinks vignette - case nb 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ----------------------------------------- | |
# TCGAbiolinks vignette Case nb 2 | |
# ----------------------------------------- | |
library(TCGAbiolinks) | |
library(SummarizedExperiment) | |
#----------- Parameters-------------------- | |
cancer <- "LGG" | |
PlatformCancer <- "IlluminaHiSeq_RNASeqV2" | |
dataType <- "rsem.genes.results" | |
pathCancer <- paste0("TCGA/data",cancer) | |
#------------------------------------------ | |
#----------- DATA -------------------- | |
datQuery <- TCGAquery(tumor = cancer, platform = PlatformCancer, | |
level = "3") | |
lsSample <- TCGAquery_samplesfilter(query = datQuery) | |
dataSmTP <- TCGAquery_SampleTypes(barcode = lsSample$IlluminaHiSeq_RNASeqV2, | |
typesample = "TP") | |
# get subtype information | |
dataSubt <- TCGAquery_subtype(tumor = cancer) | |
# get clinical data | |
dataClin <- TCGAquery_clinic(tumor = cancer, clinical_data_type = "clinical_patient") | |
TCGAdownload(data = datQuery, path = pathCancer, type = dataType, samples = dataSmTP ) | |
# select option 6 | |
dataAssy <- TCGAprepare(query = datQuery, | |
dir = pathCancer, | |
type = dataType, | |
save = TRUE, | |
summarizedExperiment = TRUE, | |
samples = dataSmTP, | |
add.mutation.genes = TRUE, | |
add.subtype = TRUE, | |
filename = paste0(cancer,"_",PlatformCancer,".rda")) | |
# expression data with molecular subtypes | |
dataAssy <- subset(dataAssy, select = colData(dataAssy)$patient %in% dataSubt$patient) | |
dataPrep <- TCGAanalyze_Preprocessing(object = dataAssy,cor.cut = 0.6) | |
dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep, | |
geneInfo = geneInfo, | |
method = "gcContent") | |
datFilt1 <- TCGAanalyze_Filtering(tabDF = dataNorm,method = "varFilter") | |
datFilt2 <- TCGAanalyze_Filtering(tabDF = datFilt1,method = "filter1") | |
datFilt <- TCGAanalyze_Filtering(tabDF = datFilt2,method = "filter2") | |
rownames(datFilt) <- colData(dataAssy)$patient | |
data_Hc1 <- TCGAanalyze_Clustering(tabDF = datFilt, | |
method = "hclust", | |
methodHC = "ward.D2") | |
data_Hc2 <- TCGAanalyze_Clustering(tabDF = datFilt, | |
method = "consensus", | |
methodHC = "ward.D2") | |
#------ Add cluster information | |
cluster <- data.frame("groupsHC" = data_Hc2[[4]]$consensusClass) | |
cluster$groupsHC <- paste0("EC",cluster$groupsHC) | |
cluster$patient <- substr(colData(dataAssy)$patient,1,12) | |
# Add information about gropus from consensus Cluster in clinical data | |
dataClin <- merge(dataClin,cluster, by.x="bcr_patient_barcode", by.y="patient") | |
# Merge subtype and clinical data | |
clin_subt <- merge(dataClin,dataSubt, by.x="bcr_patient_barcode", by.y="patient") | |
clin_subt_all <- merge(dataClin,dataSubt, | |
by.x="bcr_patient_barcode", by.y="patient", all.x = TRUE) | |
#----------- VISUALIZE -------------------- | |
# plotting survival for groups EC1, EC2, EC3, EC4 | |
TCGAanalyze_survival(data = clin_subt_all, | |
clusterCol = "groupsHC", | |
main = "TCGA kaplan meier survival plot from consensus cluster", | |
legend = "RNA Group", | |
color = c("black","red","blue","green3"), | |
filename = "case2_surv.pdf") | |
TCGAvisualize_profilePlot(clin_subt, | |
groupCol="groupsHC", | |
subtypeCol="IDH.codel.subtype", | |
filename = "case2_Idh.png", | |
plot.margin = c(-4.0, -2.5, -0.5, 2), | |
na.rm.groups = TRUE) | |
TCGAvisualize_profilePlot(clin_subt, | |
groupCol="groupsHC", | |
subtypeCol="Supervised.DNA.Methylation.Cluster", | |
filename = "case2_Met.png", | |
plot.margin = c(-4.0, -2.5, -0.5, 2), | |
na.rm.groups = TRUE) | |
TCGAvisualize_Heatmap(t(datFilt), | |
col.metadata = clin_subt[,c("bcr_patient_barcode", | |
"groupsHC", | |
"histological_type", | |
"IDH.codel.subtype")], | |
col.colors = list( | |
groupsHC = c("EC1"="black", | |
"EC2"="red", | |
"EC3"="blue", | |
"EC4"="green3"), | |
histological_type=c("Astrocytoma"="navy", | |
"Oligoastrocytoma"="green3", | |
"Oligodendroglioma"="red"), | |
IDH.codel.subtype = c("IDHmut-codel"="tomato", | |
"IDHmut-non-codel"="navy", | |
"IDHwt"="gold","NA"="white")), | |
sortCol = "groupsHC", | |
type = "expression", # sets default color | |
scale = "row", # use z-scores for better visualization | |
title = "Heatmap from concensus cluster", | |
cluster_rows = TRUE) | |
LGGmut <- TCGAquery_maf(tumor = "LGG", archive.name="LGG.IlluminaGA_DNASeq_curated.Level_2.1.4.0") | |
# Selecting gene | |
mRNAsel <- "ATRX" | |
LGGselected <- LGGmut[LGGmut$Hugo_Symbol == mRNAsel,] | |
dataMut <- LGGselected[!duplicated(LGGselected$Tumor_Sample_Barcode),] | |
dataMut$Tumor_Sample_Barcode <- substr(dataMut$Tumor_Sample_Barcode,1,12) | |
# Adding the Expression Cluster classification found before | |
dataMut <- merge(dataMut, cluster, by.y="patient", by.x="Tumor_Sample_Barcode") | |
dataMut <- dataMut[dataMut$Variant_Classification!=0,] | |
TCGAvisualize_profilePlot(dataMut, | |
groupCol = "groupsHC", | |
subtypeCol = "Variant_Classification", | |
na.rm.groups = FALSE, | |
plot.margin = c(-4.0, -2.5, -0.5, 2), | |
filename = paste0(mRNAsel,"_Mutation_Variant_Classification.png")) | |
TCGAvisualize_profilePlot(dataMut, | |
groupCol = "groupsHC", | |
subtypeCol = "Variant_Type", | |
na.rm.groups = FALSE, | |
plot.margin = c(-4.0, -2.5, -0.5, 2), | |
filename = paste0(mRNAsel,"_Mutation_Variant_Type_in_EC_clusters.png")) | |
TCGAvisualize_mutation(data = colData(dataAssy), | |
groupCol = "Supervised.DNA.Methylation.Cluster", | |
geneList = "ATRX", | |
filename = "mutation_ATRX_RNASeqCluster.png", | |
by = "cluster") | |
TCGAvisualize_mutation(data = colData(dataAssy), | |
groupCol = "Supervised.DNA.Methylation.Cluster", | |
geneList = "IDH1", | |
filename = "mutation_IDH1_RNASeqCluster.png", | |
by = "cluster") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment