Created
April 29, 2020 04:36
-
-
Save skurscheid/9c489ed67e767a6c3f6aff6f0c7ea0ab to your computer and use it in GitHub Desktop.
Cluster profiler processing of DESeq2 results -
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(clusterProfiler) | |
library(magrittr) | |
library(tidyverse) | |
library(qusage) | |
library(msigdbr) | |
library(biomaRt) | |
library(DOSE) | |
library(org.Mm.eg.db) | |
library(enrichplot) | |
library(data.table) | |
#reading in data | |
## assume that 1st column is ID | |
## 2nd column is fold change | |
data1 <- read.csv("wt_mal_ercc_results.csv", header = TRUE) | |
setDT(data1) # just for better readibility when printing | |
#removing duplicates (required for GSEA analysis) | |
data1_new = data1 %>% distinct(X, .keep_all = TRUE) | |
setDT(data1_new) | |
# remove all the rows which contain no data | |
data1_new <- data1_new[!is.na(data1_new$log2FoldChange)] | |
#entrez gene id | |
# not really needed here as you can use Ensembl ID for mapping to GO terms | |
mart <- useDataset("mmusculus_gene_ensembl", useMart("ensembl")) | |
bm <- getBM(attributes= c("ensembl_gene_id", "entrezgene_id"), mart= mart) | |
d1_new <- inner_join(data1_new, bm, by=c("X"="ensembl_gene_id")) | |
## feature 1: numeric vector | |
geneList1 <- d1_new[,3] | |
## feature 2: named vector | |
# I have changed this to use the Ensembl ID instead as I noticed lots of missing Entrez IDs | |
names(geneList1) <- as.character(d1_new[,1]) | |
## feature 3: decreasing order | |
geneList1 <- sort(geneList1, decreasing = TRUE) | |
#define fold change greater than 2 as DEG | |
gene1 <- names(geneList1)[abs(geneList1) > 2] | |
# if you d | |
gene1 <- gene1[!is.na(gene1)] | |
#over-representation analysis | |
ego_bp1 <- enrichGO(gene = gene1, | |
universe = names(geneList1), | |
OrgDb = org.Mm.eg.db, | |
keyType = 'ENSEMBL', | |
ont = "ALL", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.01, | |
qvalueCutoff = 0.05, | |
readable = TRUE) | |
head(ego_bp1) | |
#GSEA analysis | |
ego_gse1 <- gseGO(geneList = geneList1, | |
OrgDb = org.Mm.eg.db, | |
ont = "BP", # changed this for testing purposes | |
keyType = "ENSEMBL", | |
nPerm = 1000, | |
minGSSize = 100, | |
maxGSSize = 500, | |
pvalueCutoff = 0.1, | |
verbose = FALSE) | |
head(ego_gse1, 10) | |
# inspect the whole range of qvalues | |
ego_gse1@result$qvalues | |
# there are indeed tests that do have a higher qvalue | |
# I recall that there is an issue with BH adjustment, as it is not continous (Maurits might be better informed!) | |
# which could lead to the effects we are seeing here | |
dotplot(ego_gse1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment