Skip to content

Instantly share code, notes, and snippets.

@wenjie1991
Last active May 9, 2022 09:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wenjie1991/d79fe428ac80c8f2e5d781a966df3978 to your computer and use it in GitHub Desktop.
Save wenjie1991/d79fe428ac80c8f2e5d781a966df3978 to your computer and use it in GitHub Desktop.
Pathway Enrichment

This script is used for pathway enrichment analysis of paper: Paracrine signalling between intestinal epithelial and tumour cells induces a regenerative programme eLife2022;11:e76541 DOI: https://doi.org/10.7554/eLife.76541

Licence

pathway_enrichment.R - Pathway enrichment analysis Written in 2019 by Wenjie SUN sunwjie@gmail.com

To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see http://creativecommons.org/publicdomain/zero/1.0/.

library(stringr)
library(magrittr)
library(msigdbr)
library(clusterProfiler)
library(org.Mm.eg.db)
symbol2entrez = function(x) {
ensembl = str_split_fixed(x, "\\|", 2) %>% extract(, 1) %>% str_split_fixed("\\.", 2) %>% extract(, 1)
select(org.Mm.eg.db, ensembl, "ENTREZID", "SYMBOL")[["ENTREZID"]] %>% na.omit
}
gene_set_enrichment = function(query, species = "mouse", background = NULL) {
# TODO: Human datasets
species_dict = c("mouse" = "Mus musculus", "human" = "Homo ...")
species_official_name = species_dict[species]
m_df = msigdbr(species = species_official_name, category = "H")
m_HGENE = m_df %>% dplyr::select(gs_id, entrez_gene) %>% as.data.frame()
m_HNAME = m_df %>% dplyr::select(gs_id, gs_name) %>% as.data.frame()
# m_df = msigdbr(species = species_official_name, category = "C2")
# m_c2GENE = m_df %>% dplyr::select(gs_id, entrez_gene) %>% as.data.frame()
# m_c2NAME = m_df %>% dplyr::select(gs_id, gs_name) %>% as.data.frame()
# m_df = msigdbr(species = species_official_name, category = "C4")
# m_c4GENE = m_df %>% dplyr::select(gs_name, entrez_gene) %>% as.data.frame()
# m_c4NAME = m_df %>% dplyr::select(gs_id, gs_name) %>% as.data.frame()
enrichemnt = list()
# TODO: auto transform the gene name type
entrezid = query
if (is.null(background)) {
enrichemnt$KEGG = enrichKEGG(entrezid, organism = 'mmu', pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, universe = background)
# enrichemnt$C2 = enricher(entrezid, universe = background, pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE = m_c2GENE, TERM2NAME = m_c2NAME)
enrichemnt$HALLMARK = enricher(entrezid, universe = background, pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE = m_HGENE, TERM2NAME = m_HNAME)
# enrichemnt$C4 = enricher(entrezid, universe = background, pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE = m_c4GENE, TERM2NAME = m_c4NAME)
enrichemnt = lapply(enrichemnt, function(x) {setReadable(x, OrgDb = org.Mm.eg.db, keyType="ENTREZID") %>% data.frame}) %>% ldply(.id = "DataBase") %>% data.table
}
enrichemnt
}
plot_enrichment_result = function(x) {
g = ggplot(x) + aes(x = -log10(pvalue), y = Description, color = Count) + geom_point()
g + theme_bw() + scale_color_continuous()
}
## EXAMPLE:
# d = fread("../data/As_result/WATER_VS_DSS_AS_diffpsi0.1.tsv")
# up_symbol_v = d[change_type == "up", symbol] %>% symbol2entrez
# down_symbol_v = d[change_type == "down", symbol] %>% symbol2entrez
# both_v = d$symbol %>% symbol2entrez
# dir.create("../data/GeneSet_result")
# x = gene_set_enrichment(up_symbol_v)[pvalue <= 0.1]
# g1 =plot_enrichment_result(x) + labs(title = "UP genes")
# write_tsv(x, "../data/GeneSet_result/Water_VS_DSS_UP_pathway.tsv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment