Skip to content

Instantly share code, notes, and snippets.

@saketkc
Created March 7, 2024 05:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save saketkc/11a12bfd202f0b4570f76eabad4d0e4e to your computer and use it in GitHub Desktop.
Save saketkc/11a12bfd202f0b4570f76eabad4d0e4e to your computer and use it in GitHub Desktop.
get_human_orthologs.R
library(biomaRt)
species_tx2gene_map = c(
"btaurus",
"mauratus",
"mmulatta",
"ggallus",
"sscrofa",
"mmusculus",
"dreiro")
"rnorvegicus",
"pabelii",
"mdomestica",
"ptroglodytes",
"mmusculus",
"mpfuro",
"oanatinus",
"ggorilla",
"ppaniscus")
attributes_base <- c("_homolog_ensembl_gene",
"_homolog_associated_gene_name",
"_homolog_ensembl_peptide",
"_homolog_chromosome",
"_homolog_chrom_start",
"_homolog_chrom_end",
"_homolog_canonical_transcript_protein",
"_homolog_subtype",
"_homolog_orthology_type",
"_homolog_perc_id",
"_homolog_perc_id_r1",
"_homolog_goc_score",
"_homolog_wga_coverage",
"_homolog_dn",
"_homolog_ds",
"_homolog_orthology_confidence")
mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
z <- listAttributes(mart)
species <- species_tx2gene_map[1]
for (species in species_tx2gene_map) {
attributes <- c('ensembl_gene_id', 'external_gene_name', paste0(species, attributes_base))
attributes <- intersect(attributes, z$name)
filter <- paste('with', species, 'homolog', sep = '_')
orth <- getBM( attributes,filters=filter,
values=TRUE, mart = mart, bmHeader=FALSE)
df <- as.data.frame(orth)
colnames(df) <- gsub(pattern = paste0(species, "_"), replacement = "", x = colnames(df))
write.table(df, file=file.path('data/orthologs/', paste0('human_', species, '.tsv')), row.names=F, col.names=T, quote=F, sep='\t' )
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment