|
library(stringr) |
|
library(magrittr) |
|
library(msigdbr) |
|
library(clusterProfiler) |
|
library(org.Mm.eg.db) |
|
|
|
symbol2entrez = function(x) { |
|
ensembl = str_split_fixed(x, "\\|", 2) %>% extract(, 1) %>% str_split_fixed("\\.", 2) %>% extract(, 1) |
|
select(org.Mm.eg.db, ensembl, "ENTREZID", "SYMBOL")[["ENTREZID"]] %>% na.omit |
|
} |
|
|
|
gene_set_enrichment = function(query, species = "mouse", background = NULL) { |
|
|
|
# TODO: Human datasets |
|
species_dict = c("mouse" = "Mus musculus", "human" = "Homo ...") |
|
species_official_name = species_dict[species] |
|
|
|
m_df = msigdbr(species = species_official_name, category = "H") |
|
m_HGENE = m_df %>% dplyr::select(gs_id, entrez_gene) %>% as.data.frame() |
|
m_HNAME = m_df %>% dplyr::select(gs_id, gs_name) %>% as.data.frame() |
|
# m_df = msigdbr(species = species_official_name, category = "C2") |
|
# m_c2GENE = m_df %>% dplyr::select(gs_id, entrez_gene) %>% as.data.frame() |
|
# m_c2NAME = m_df %>% dplyr::select(gs_id, gs_name) %>% as.data.frame() |
|
# m_df = msigdbr(species = species_official_name, category = "C4") |
|
# m_c4GENE = m_df %>% dplyr::select(gs_name, entrez_gene) %>% as.data.frame() |
|
# m_c4NAME = m_df %>% dplyr::select(gs_id, gs_name) %>% as.data.frame() |
|
|
|
enrichemnt = list() |
|
# TODO: auto transform the gene name type |
|
entrezid = query |
|
|
|
if (is.null(background)) { |
|
enrichemnt$KEGG = enrichKEGG(entrezid, organism = 'mmu', pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, universe = background) |
|
# enrichemnt$C2 = enricher(entrezid, universe = background, pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE = m_c2GENE, TERM2NAME = m_c2NAME) |
|
enrichemnt$HALLMARK = enricher(entrezid, universe = background, pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE = m_HGENE, TERM2NAME = m_HNAME) |
|
# enrichemnt$C4 = enricher(entrezid, universe = background, pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE = m_c4GENE, TERM2NAME = m_c4NAME) |
|
enrichemnt = lapply(enrichemnt, function(x) {setReadable(x, OrgDb = org.Mm.eg.db, keyType="ENTREZID") %>% data.frame}) %>% ldply(.id = "DataBase") %>% data.table |
|
} |
|
enrichemnt |
|
} |
|
|
|
plot_enrichment_result = function(x) { |
|
g = ggplot(x) + aes(x = -log10(pvalue), y = Description, color = Count) + geom_point() |
|
g + theme_bw() + scale_color_continuous() |
|
} |
|
|
|
|
|
## EXAMPLE: |
|
# d = fread("../data/As_result/WATER_VS_DSS_AS_diffpsi0.1.tsv") |
|
# up_symbol_v = d[change_type == "up", symbol] %>% symbol2entrez |
|
# down_symbol_v = d[change_type == "down", symbol] %>% symbol2entrez |
|
# both_v = d$symbol %>% symbol2entrez |
|
|
|
# dir.create("../data/GeneSet_result") |
|
# x = gene_set_enrichment(up_symbol_v)[pvalue <= 0.1] |
|
# g1 =plot_enrichment_result(x) + labs(title = "UP genes") |
|
# write_tsv(x, "../data/GeneSet_result/Water_VS_DSS_UP_pathway.tsv") |