Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Yeast gene symbols to human homologs
import numpy as np
import pandas as pd
import requests
# Interested genes
df = pd.read_excel("yeast_genes.xlsx")
pom_genes = df["Schizosaccharomyces pombe"][
df["Schizosaccharomyces pombe"].notnull()
].tolist()
cer_genes = df["Saccharomyces cerevisiae"][
df["Saccharomyces cerevisiae"].notnull()
].tolist()
# SGD data
cer_map = pd.read_table(
"http://sgd-archive.yeastgenome.org/curation/chromosomal_feature/SGD_features.tab",
header=None,
)
cer_map = cer_map.iloc[:, [0, 4]]
cer_map.columns = ["SGDID", "Symbol"]
cer_map = cer_map[cer_map["Symbol"].isin(cer_genes)].reset_index(drop=True)
# Get human homologs from Alliance of Genome Resources
ss = requests.session()
payload = {
"filter.stringency": "stringent",
"taxonID": "NCBITaxon:9606",
}
res = []
for i, row in cer_map.iterrows():
r = ss.get(
f"https://www.alliancegenome.org/api/gene/SGD:{row['SGDID']}/homologs",
params=payload,
)
homologs = [
x["homologGene"]["symbol"]
for x in r.json()["results"]
if x["homologGene"]["species"]["taxonId"] == "NCBITaxon:9606"
]
res.append(homologs)
cer_map["HumanSymbol"] = res
cer_map = cer_map[["Symbol", "HumanSymbol"]]
# Fission yeast (Schizosaccharomyces pombe) data from PomBase
pom_ortho = pd.read_table(
"ftp://ftp.pombase.org/pombe/orthologs/human-orthologs.txt.gz", header=None,
)
pom_ortho.columns = ["Systematic", "HumanSymbol"]
pom_ortho = (
pom_ortho.assign(HumanSymbol=pom_ortho["HumanSymbol"].str.split("|"))
.explode("HumanSymbol")
.query("(HumanSymbol.notnull()) & (HumanSymbol != 'NONE')")
)
pom_map = pd.read_table(
"ftp://ftp.pombase.org/pombe/names_and_identifiers/gene_IDs_names.tsv",
skiprows=1,
header=None,
)
pom_map.columns = ["Systematic", "Symbol", "Synonyms"]
pom_map = (
pom_map.drop(["Synonyms"], axis=1)
.assign(Symbol=pom_map["Symbol"].str.upper())
.query("Symbol.isin(@pom_genes)")
.merge(pom_ortho, on="Systematic")
.groupby(["Systematic", "Symbol"])["HumanSymbol"]
.apply(list)
.reset_index(name="HumanSymbol")[["Symbol", "HumanSymbol"]]
)
# Merge results
res = (
df.merge(cer_map, how="left", left_on="Saccharomyces cerevisiae", right_on="Symbol")
.drop("Symbol", axis=1)
.merge(pom_map, how="left", left_on="Schizosaccharomyces pombe", right_on="Symbol")
.drop("Symbol", axis=1)
)
res = res.assign(
HumanSymbol_x=res["HumanSymbol_x"].apply(lambda x: [] if x is np.NaN else x)
).assign(HumanSymbol_y=res["HumanSymbol_y"].apply(lambda x: [] if x is np.NaN else x))
res = res.assign(HumanSymbol=res["HumanSymbol_x"] + res["HumanSymbol_y"]).drop(
["HumanSymbol_x", "HumanSymbol_y"], axis=1
)
res["HumanSymbol"] = res["HumanSymbol"].apply(lambda x: sorted(list(set(x))))
res.rename(columns={"HumanSymbol": "Homo sapiens"}).to_excel(
"yeast_table_1.xlsx", index=False
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment