Skip to content

Instantly share code, notes, and snippets.

@tiagochst
Last active September 23, 2020 18:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tiagochst/277651ebed998fd3d1952d3fbc376ef2 to your computer and use it in GitHub Desktop.
Save tiagochst/277651ebed998fd3d1952d3fbc376ef2 to your computer and use it in GitHub Desktop.
TCGAbiolinks vignette - case nb 2
# -----------------------------------------
# TCGAbiolinks vignette Case nb 2
# -----------------------------------------
library(TCGAbiolinks)
library(SummarizedExperiment)
#----------- Parameters--------------------
cancer <- "LGG"
PlatformCancer <- "IlluminaHiSeq_RNASeqV2"
dataType <- "rsem.genes.results"
pathCancer <- paste0("TCGA/data",cancer)
#------------------------------------------
#----------- DATA --------------------
datQuery <- TCGAquery(tumor = cancer, platform = PlatformCancer,
level = "3")
lsSample <- TCGAquery_samplesfilter(query = datQuery)
dataSmTP <- TCGAquery_SampleTypes(barcode = lsSample$IlluminaHiSeq_RNASeqV2,
typesample = "TP")
# get subtype information
dataSubt <- TCGAquery_subtype(tumor = cancer)
# get clinical data
dataClin <- TCGAquery_clinic(tumor = cancer, clinical_data_type = "clinical_patient")
TCGAdownload(data = datQuery, path = pathCancer, type = dataType, samples = dataSmTP )
# select option 6
dataAssy <- TCGAprepare(query = datQuery,
dir = pathCancer,
type = dataType,
save = TRUE,
summarizedExperiment = TRUE,
samples = dataSmTP,
add.mutation.genes = TRUE,
add.subtype = TRUE,
filename = paste0(cancer,"_",PlatformCancer,".rda"))
# expression data with molecular subtypes
dataAssy <- subset(dataAssy, select = colData(dataAssy)$patient %in% dataSubt$patient)
dataPrep <- TCGAanalyze_Preprocessing(object = dataAssy,cor.cut = 0.6)
dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
geneInfo = geneInfo,
method = "gcContent")
datFilt1 <- TCGAanalyze_Filtering(tabDF = dataNorm,method = "varFilter")
datFilt2 <- TCGAanalyze_Filtering(tabDF = datFilt1,method = "filter1")
datFilt <- TCGAanalyze_Filtering(tabDF = datFilt2,method = "filter2")
rownames(datFilt) <- colData(dataAssy)$patient
data_Hc1 <- TCGAanalyze_Clustering(tabDF = datFilt,
method = "hclust",
methodHC = "ward.D2")
data_Hc2 <- TCGAanalyze_Clustering(tabDF = datFilt,
method = "consensus",
methodHC = "ward.D2")
#------ Add cluster information
cluster <- data.frame("groupsHC" = data_Hc2[[4]]$consensusClass)
cluster$groupsHC <- paste0("EC",cluster$groupsHC)
cluster$patient <- substr(colData(dataAssy)$patient,1,12)
# Add information about gropus from consensus Cluster in clinical data
dataClin <- merge(dataClin,cluster, by.x="bcr_patient_barcode", by.y="patient")
# Merge subtype and clinical data
clin_subt <- merge(dataClin,dataSubt, by.x="bcr_patient_barcode", by.y="patient")
clin_subt_all <- merge(dataClin,dataSubt,
by.x="bcr_patient_barcode", by.y="patient", all.x = TRUE)
#----------- VISUALIZE --------------------
# plotting survival for groups EC1, EC2, EC3, EC4
TCGAanalyze_survival(data = clin_subt_all,
clusterCol = "groupsHC",
main = "TCGA kaplan meier survival plot from consensus cluster",
legend = "RNA Group",
color = c("black","red","blue","green3"),
filename = "case2_surv.pdf")
TCGAvisualize_profilePlot(clin_subt,
groupCol="groupsHC",
subtypeCol="IDH.codel.subtype",
filename = "case2_Idh.png",
plot.margin = c(-4.0, -2.5, -0.5, 2),
na.rm.groups = TRUE)
TCGAvisualize_profilePlot(clin_subt,
groupCol="groupsHC",
subtypeCol="Supervised.DNA.Methylation.Cluster",
filename = "case2_Met.png",
plot.margin = c(-4.0, -2.5, -0.5, 2),
na.rm.groups = TRUE)
TCGAvisualize_Heatmap(t(datFilt),
col.metadata = clin_subt[,c("bcr_patient_barcode",
"groupsHC",
"histological_type",
"IDH.codel.subtype")],
col.colors = list(
groupsHC = c("EC1"="black",
"EC2"="red",
"EC3"="blue",
"EC4"="green3"),
histological_type=c("Astrocytoma"="navy",
"Oligoastrocytoma"="green3",
"Oligodendroglioma"="red"),
IDH.codel.subtype = c("IDHmut-codel"="tomato",
"IDHmut-non-codel"="navy",
"IDHwt"="gold","NA"="white")),
sortCol = "groupsHC",
type = "expression", # sets default color
scale = "row", # use z-scores for better visualization
title = "Heatmap from concensus cluster",
cluster_rows = TRUE)
LGGmut <- TCGAquery_maf(tumor = "LGG", archive.name="LGG.IlluminaGA_DNASeq_curated.Level_2.1.4.0")
# Selecting gene
mRNAsel <- "ATRX"
LGGselected <- LGGmut[LGGmut$Hugo_Symbol == mRNAsel,]
dataMut <- LGGselected[!duplicated(LGGselected$Tumor_Sample_Barcode),]
dataMut$Tumor_Sample_Barcode <- substr(dataMut$Tumor_Sample_Barcode,1,12)
# Adding the Expression Cluster classification found before
dataMut <- merge(dataMut, cluster, by.y="patient", by.x="Tumor_Sample_Barcode")
dataMut <- dataMut[dataMut$Variant_Classification!=0,]
TCGAvisualize_profilePlot(dataMut,
groupCol = "groupsHC",
subtypeCol = "Variant_Classification",
na.rm.groups = FALSE,
plot.margin = c(-4.0, -2.5, -0.5, 2),
filename = paste0(mRNAsel,"_Mutation_Variant_Classification.png"))
TCGAvisualize_profilePlot(dataMut,
groupCol = "groupsHC",
subtypeCol = "Variant_Type",
na.rm.groups = FALSE,
plot.margin = c(-4.0, -2.5, -0.5, 2),
filename = paste0(mRNAsel,"_Mutation_Variant_Type_in_EC_clusters.png"))
TCGAvisualize_mutation(data = colData(dataAssy),
groupCol = "Supervised.DNA.Methylation.Cluster",
geneList = "ATRX",
filename = "mutation_ATRX_RNASeqCluster.png",
by = "cluster")
TCGAvisualize_mutation(data = colData(dataAssy),
groupCol = "Supervised.DNA.Methylation.Cluster",
geneList = "IDH1",
filename = "mutation_IDH1_RNASeqCluster.png",
by = "cluster")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment