Skip to content

Instantly share code, notes, and snippets.

@skurscheid
Created April 29, 2020 04:36
Show Gist options
  • Save skurscheid/9c489ed67e767a6c3f6aff6f0c7ea0ab to your computer and use it in GitHub Desktop.
Save skurscheid/9c489ed67e767a6c3f6aff6f0c7ea0ab to your computer and use it in GitHub Desktop.
Cluster profiler processing of DESeq2 results -
library(clusterProfiler)
library(magrittr)
library(tidyverse)
library(qusage)
library(msigdbr)
library(biomaRt)
library(DOSE)
library(org.Mm.eg.db)
library(enrichplot)
library(data.table)
#reading in data
## assume that 1st column is ID
## 2nd column is fold change
data1 <- read.csv("wt_mal_ercc_results.csv", header = TRUE)
setDT(data1) # just for better readibility when printing
#removing duplicates (required for GSEA analysis)
data1_new = data1 %>% distinct(X, .keep_all = TRUE)
setDT(data1_new)
# remove all the rows which contain no data
data1_new <- data1_new[!is.na(data1_new$log2FoldChange)]
#entrez gene id
# not really needed here as you can use Ensembl ID for mapping to GO terms
mart <- useDataset("mmusculus_gene_ensembl", useMart("ensembl"))
bm <- getBM(attributes= c("ensembl_gene_id", "entrezgene_id"), mart= mart)
d1_new <- inner_join(data1_new, bm, by=c("X"="ensembl_gene_id"))
## feature 1: numeric vector
geneList1 <- d1_new[,3]
## feature 2: named vector
# I have changed this to use the Ensembl ID instead as I noticed lots of missing Entrez IDs
names(geneList1) <- as.character(d1_new[,1])
## feature 3: decreasing order
geneList1 <- sort(geneList1, decreasing = TRUE)
#define fold change greater than 2 as DEG
gene1 <- names(geneList1)[abs(geneList1) > 2]
# if you d
gene1 <- gene1[!is.na(gene1)]
#over-representation analysis
ego_bp1 <- enrichGO(gene = gene1,
universe = names(geneList1),
OrgDb = org.Mm.eg.db,
keyType = 'ENSEMBL',
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
head(ego_bp1)
#GSEA analysis
ego_gse1 <- gseGO(geneList = geneList1,
OrgDb = org.Mm.eg.db,
ont = "BP", # changed this for testing purposes
keyType = "ENSEMBL",
nPerm = 1000,
minGSSize = 100,
maxGSSize = 500,
pvalueCutoff = 0.1,
verbose = FALSE)
head(ego_gse1, 10)
# inspect the whole range of qvalues
ego_gse1@result$qvalues
# there are indeed tests that do have a higher qvalue
# I recall that there is an issue with BH adjustment, as it is not continous (Maurits might be better informed!)
# which could lead to the effects we are seeing here
dotplot(ego_gse1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment