Last active
March 4, 2023 10:34
-
-
Save Close-your-eyes/803c080e400e5626a0f4f68fa87b517b to your computer and use it in GitHub Desktop.
Produce a conversion table of human genes and mouse orthologs using bioMart
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## initially based on orthologsBioMART::findOrthologs from https://github.com/vitkl/orthologsBioMART | |
## boiled down to essentially reproduce and extend this table: ProjecTILs::Hs2Mm.convert.table | |
features <- c() # select human features to find orthologs for; e.g. rownames(Seurat::GetAssayData(SO, slot = "data", assay = "RNA")) | |
# use biomart to map orthologs | |
hs_mart <- biomaRt::useMart("ensembl", dataset="hsapiens_gene_ensembl") | |
#mm_mart <- biomaRt::useMart("ensembl", dataset="mmusculus_gene_ensembl") # not needed | |
hs_ids <- biomaRt::getBM(attributes = unique(c("hgnc_symbol", "ensembl_gene_id")), | |
filters = "hgnc_symbol", | |
values = features, | |
mart = hs_mart) %>% | |
tibble::as_tibble() %>% | |
dplyr::rename("Gene.HS" = 1, "Gene.stable.ID.HS" = 2) %>% | |
dplyr::distinct() %>% | |
dplyr::filter(trimws(Gene.HS) != "", trimws(Gene.stable.ID.HS) != "") | |
mm_orth <- biomaRt::getBM(attributes = c("ensembl_gene_id", "mmusculus_homolog_ensembl_gene", "mmusculus_homolog_associated_gene_name"), | |
filters = "ensembl_gene_id", | |
values = hs_ids[,"Gene.stable.ID.HS",drop=T], | |
mart = hs_mart) %>% | |
tibble::as_tibble() %>% | |
dplyr::rename("Gene.stable.ID.HS" = 1, "Gene.stable.ID.MM" = 2, "Gene.MM" = 3) %>% | |
dplyr::distinct() %>% | |
dplyr::filter(trimws(Gene.MM) != "", trimws(Gene.stable.ID.MM) != "") | |
table <- | |
hs_ids %>% | |
dplyr::left_join(mm_orth) %>% | |
tidyr::drop_na() %>% | |
dplyr::select(-Gene.stable.ID.MM) %>% | |
dplyr::distinct(Gene.HS, Gene.MM, .keep_all = T) # keep an arbitrary Gene.stable.ID.HS for each match | |
# make a long data frame from alt.symbol of ProjecTILs::Hs2Mm.convert.table | |
pt_table <- | |
ProjecTILs::Hs2Mm.convert.table %>% | |
dplyr::mutate(Alt.symbol = stringr::str_split(Alt.symbol, ",")) %>% | |
dplyr::mutate(Alt.symbol.HS = stringr::str_split(Alt.symbol.HS, ",")) %>% | |
tidyr::unnest(Alt.symbol) %>% | |
tidyr::unnest(Alt.symbol.HS) %>% | |
dplyr::mutate(Gene.stable.ID.HS = ifelse(Alt.symbol.HS == Gene.HS, Gene.stable.ID.HS, NA)) %>% | |
dplyr::select(-Gene.HS, -Gene.MM) %>% | |
dplyr::rename("Gene.HS" = Alt.symbol.HS, "Gene.MM" = Alt.symbol) %>% | |
tibble::as_tibble() | |
# bind rows and make distinct | |
ortholog_table <- | |
hs_ids %>% | |
dplyr::left_join(mm_orth) %>% | |
dplyr::select(-Gene.stable.ID.MM) %>% | |
dplyr::bind_rows(pt_table) %>% | |
tidyr::drop_na() %>% | |
dplyr::distinct(Gene.HS, Gene.MM, .keep_all = T) # keep an arbitrary Gene.stable.ID.HS for each match | |
multi_entries <- stack(table(ortholog_table$Gene.HS)) # FYI | |
# filter ortholog_table for features that actually exist in ref Seurat object (e.g. mouse) and in own object (e.g. human) | |
# then pass to ProjecTILs::make.projection() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment