Skip to content

Instantly share code, notes, and snippets.

@ATpoint
Last active June 26, 2023 11:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ATpoint/c6d8a4e0d7665bca538d35a50daa4331 to your computer and use it in GitHub Desktop.
Save ATpoint/c6d8a4e0d7665bca538d35a50daa4331 to your computer and use it in GitHub Desktop.
Query biomaRt and the KEGG API to produce a table connecting KEGG pathways with gene names for mouse and human
library(biomaRt)
library(data.table)
library(dplyr)
library(magrittr)
kegg <- sapply(c("mouse", "human"), function(x){
if(x=="mouse"){
dataset <- "mmusculus_gene_ensembl"
u <- "mmu"
gene_name <- "mgi_symbol"
}
if(x=="human"){
dataset <- "hsapiens_gene_ensembl"
u <- "hsa"
gene_name <- "hgnc_symbol"
}
# Query biomaRt for a table connecting Ensembl gene id, mgi gene name and entrez gene id
mart <- biomaRt::useEnsembl("ensembl", dataset=dataset, version=101)
genes <-
biomaRt::getBM(attributes=c("ensembl_gene_id", gene_name, "entrezgene_id"), mart=mart) %>%
dplyr::mutate(entrezgene_id=as.character(entrezgene_id)) %>%
magrittr::set_colnames(c("gene_id", "gene_name", "entrezgene_id"))
# Query KEGG API for a table connecting pathways and entrez gene id
kegg_pathway2entrez <-
data.table::fread(paste0("https://rest.kegg.jp/link/", u, "/pathway"), data.table=FALSE, header=FALSE) %>%
magrittr::set_colnames(c("pathway_id", "entrezgene_id")) %>%
dplyr::mutate(entrezgene_id=as.character(gsub(paste0(u, ":"), "", entrezgene_id)),
pathway_id=gsub("path:", "", pathway_id))
# Query KEGG API for a table connecting pathway id and pathway human-readable name
kegg_pathway2name <-
data.table::fread(paste0("https://rest.kegg.jp/list/pathway/", u), data.table=FALSE, header=FALSE) %>%
magrittr::set_colnames(c("pathway_id", "pathway_name")) %>%
dplyr::mutate(pathway_name=gsub(" - .*", "", pathway_name))
# join everything
kegg <-
dplyr::full_join(x=kegg_pathway2name, y=kegg_pathway2entrez, by="pathway_id") %>%
dplyr::left_join(x=., y=genes, by="entrezgene_id") %>%
dplyr::filter(!is.na(gene_id) & !is.na(gene_name))
kegg
}, simplify=FALSE)
head(kegg$mouse)
head(kegg$human)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment