Skip to content

Instantly share code, notes, and snippets.

@tiagochst
Last active January 20, 2018 07:52
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tiagochst/e324bead84c4accef753f51bc7e4dbd1 to your computer and use it in GitHub Desktop.
Save tiagochst/e324bead84c4accef753f51bc7e4dbd1 to your computer and use it in GitHub Desktop.
# ---------------------------
# Accessing the material
# https://tinyurl.com/bioc2017-ELMER
# ---------------------------
library("Bioc2017.TCGAbiolinks.ELMER")
Biobase::openVignette("Bioc2017.TCGAbiolinks.ELMER")
# ---------------------------
# Section 1:
# Aims:
# 1.1 Accessing GDC
# 1.2 Downloading samples
# 1.3 Preparing files into a SummarizedExperiment object
# ---------------------------
# Auxiliary information:
# - GDC: The NCI's Genomic Data Commons (GDC)
# https://portal.gdc.cancer.gov/
# - Pipelines description
# https://docs.gdc.cancer.gov/Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline/
# - TCGA Barcode description:
# https://wiki.nci.nih.gov/display/TCGA/TCGA+barcode
# https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/sample-type-codes
# ---------------------------
library(TCGAbiolinks)
library(SummarizedExperiment)
library(DT)
library(dplyr)
query.exp <- GDCquery(project = "TCGA-LUSC",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - FPKM-UQ",
barcode = c("TCGA-34-5231-01","TCGA-77-7138-01"))
GDCdownload(query.exp)
exp <- GDCprepare(query = query.exp,
save = TRUE,
save.filename = "Exp_LUSC.rda",
summarizedExperiment = TRUE)
# Summarized experiment
# http://bioconductor.org/packages/release/bioc/vignettes/SummarizedExperiment/inst/doc/SummarizedExperiment.html
query.met <- GDCquery(project = "TCGA-LUSC",
data.category = "DNA Methylation",
platform = "Illumina Human Methylation 450",
barcode = c("TCGA-34-5231-01A-21D-1818-05","TCGA-77-7138-01A-41D-2043-05"))
GDCdownload(query.met)
met <- GDCprepare(query = query.met,
save = TRUE,
save.filename = "DNAmethylation_LUSC.rda",
summarizedExperiment = TRUE)
# ---------------------------
# Section 2:
# Aims:
# 2.1 Create a MultiAssayExperiment
# 2.2 Find Differently methylated probes
# (Normal tissue sample vs Primary Solid Tumor)
# 2.3 Find inverse correlated probes-genes pairs
# (probes hypomethylated and gene upregulated)
# 2.4 Motif enrichment analysis on the regions with a loss of
# DNA methylation
# 2.5 Find cadidate regulatory TF
# ---------------------------
library("Bioc2017.TCGAbiolinks.ELMER")
library(MultiAssayExperiment)
library(ELMER)
lusc.exp
lusc.met
distal.probes <- get.feature.probe(genome = "hg38", met.platform = "450K")
mae <- createMAE(exp = lusc.exp,
met = lusc.met,
save = TRUE,
linearize.exp = TRUE,
filter.probes = distal.probes,
save.filename = "mae_bioc2017.rda",
met.platform = "450K",
genome = "hg38",
TCGA = TRUE)
mae
colData(mae)[1:5,] %>% as.data.frame %>% datatable(options = list(scrollX = TRUE))
sampleMap(mae)[1:5,] %>% as.data.frame %>% datatable(options = list(scrollX = TRUE))
group.col <- "definition"
group1 <- "Primary solid Tumor"
group2 <- "Solid Tissue Normal"
dir.out <- "result"
Sig.probes <- get.diff.meth(data = mae,
group.col = group.col,
group1 = group1,
group2 = group2,
minSubgroupFrac = 0.2,
sig.dif = 0.3,
diff.dir = "hypo", # Search for hypomethylated probes in group 1
cores = 4,
dir.out = dir.out,
pvalue = 0.01)
nearGenes <- GetNearGenes(data = mae,
probes = Sig.probes$probe,
numFlankingGenes = 20, # 10 upstream and 10 dowstream genes
cores = 4)
pair <- get.pair(data = mae,
group.col = group.col,
group1 = group1,
group2 = group2,
nearGenes = nearGenes,
minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M
permu.dir = "result/permu",
permu.size = 100, # Please set to 100000 to get significant results
pvalue = 0.05,
Pe = 0.01, # Please set to 0.001 to get significant results
filter.probes = TRUE, # See preAssociationProbeFiltering function
filter.percentage = 0.05,
filter.portion = 0.3,
dir.out = "result",
cores = 4,
label = "hypo")
# 2.4 Motif enrichment analysis on the regions with a loss of
# DNA methylation
# Hocomoco: http://hocomoco.autosome.ru/
library(ELMER.data)
data("Probes.motif.hg19.450K")
as.matrix(Probes.motif.hg19.450K[1:3,1:3])
enriched.motif <- get.enriched.motif(data = mae,
probes.motif = Probes.motif.hg19.450K,
probes = pair$Probe,
dir.out = dir.out,
label = "hypo",
min.incidence = 10,
lower.OR = 1.1)
motif.enrichment <- read.csv(paste0(dir.out,"/getMotif.hypo.motif.enrichment.csv"),
stringsAsFactors=FALSE)
names(enriched.motif) # enriched motifs
enriched.motif[[1]]
# TF classification: http://tfclass.bioinf.med.uni-goettingen.de/tfclass
downloader::download("https://github.com/tiagochst/ELMER.data/raw/master/data/TF.family.rda","TF.family.rda")
load("TF.family.rda")
TF.family$AHR_HUMAN.H10MO.B
downloader::download("https://github.com/tiagochst/ELMER.data/raw/master/data/TF.subfamily.rda","TF.subfamily.rda")
load("TF.subfamily.rda")
## identify regulatory TF for the enriched motifs
TF <- get.TFs(data = mae,
motif.relevant.TFs = TF.family,
group.col = group.col,
group1 = group1,
group2 = group2,
minSubgroupFrac = 0.4,
enriched.motif = enriched.motif,
dir.out = dir.out,
cores = 4,
label = "hypo")
TF.meth.cor <- get(load(paste0(dir.out,"/getTF.hypo.TFs.with.motif.pvalue.rda")))
save(mae,
Sig.probes,
pair, nearGenes,
motif.enrichment, enriched.motif,
TF.meth.cor, TF,
group.col, group1, group2,
file = paste0(dir.out,"/ELMER_results_hypo.rda"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment