Skip to content

Instantly share code, notes, and snippets.

@alexlenail
Last active December 23, 2022 13:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexlenail/35c2402cb630ffc04c209d7d6acf4039 to your computer and use it in GitHub Desktop.
Save alexlenail/35c2402cb630ffc04c209d7d6acf4039 to your computer and use it in GitHub Desktop.
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)
def gprofiler_orthologs(query, human_to_mouse=False, mouse_to_human=False, organism='mmusculus', target='hsapiens', returnall=False):
if isinstance(query, pd.Index): query = query.tolist()
elif isinstance(query, pd.Series): query = query.values.tolist()
q = [x for x in np.unique(query).tolist() if str(x) != 'nan']
if len(q) != len(query): print(f'{len(q)} unique of {len(query)}')
if human_to_mouse: organism='hsapiens'; target='mmusculus'
elif mouse_to_human: organism='mmusculus'; target='hsapiens'
df = gp.orth(organism=organism, query=q, target=target).replace('N/A', np.nan)
print(df)
missing = df.loc[df.ortholog_ensg.isnull(), 'incoming'].values
print(f'{len(missing)} missing out of {len(query)}')
multimappers = [gene for gene, b in (df.groupby('incoming').size() > 1).items() if b]
print(f'{len(multimappers)} multi-mappers out of {len(query)}')
if returnall: return df
df = df.dropna(subset=['ortholog_ensg'])
df = df[df.n_result == 1]
df = df.set_index('incoming')[['converted', 'ortholog_ensg', 'name']].rename_axis('query')
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment