Skip to content

Instantly share code, notes, and snippets.

@dmyersturnbull
Last active March 22, 2016 19:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dmyersturnbull/2a64bb0728453eca9c72 to your computer and use it in GitHub Desktop.
Save dmyersturnbull/2a64bb0728453eca9c72 to your computer and use it in GitHub Desktop.
Get all of the GO terms at the specified level and type associated with a UniProt ID. Uses Python 3.5 Typing.
# Requires https://gist.github.com/dmyersturnbull/efe32052bf4cf06df915
import pandas as pd
from typing import Iterable, Union, Mapping
from goatools import obo_parser # uses https://github.com/tanghaibao/goatools
from goatools.obo_parser import GOTerm # NOT the same as FlatGoTerm, which has no knowledge of hierarchy
if not os.path.exists('gene_ontology.1_2.obo'):
import wget
wget.download('http://www.geneontology.org/ontology/obo_format_1_2/gene_ontology.1_2.obo')
print("Done.")
# This will be used in query_obo_term
obo = obo_parser.GODag('gene_ontology.1_2.obo')
def query_obo_term(term_id: str) -> GOTerm:
"""Queries a term through the global obo.
This function wraps the call to raise a ValueError if the term is not found;
otherwise it only logs a warning.
"""
x = obo.query_term(term_id)
if x is None:
raise ValueError('Term ID {} not found'.format(x))
return x
def get_ancestors_of_go_term(term_id: str, level: int) -> Iterable[GOTerm]:
"""
From a GO term in the form 'GO:0007344', returns a set of ancestor GOTerm objects at the specified level.
The traversal is restricted to is-a relationships.
Note that the level is the minimum number of steps to the root.
Arguments:
level: starting at 0 (root)
"""
def traverse_up(term, buildup_set, level):
if term.level == level:
buildup_set.add(term)
if (term.has_parent):
return [traverse_up(p, buildup_set, level) for p in term.parents]
return None
terms = set()
traverse_up(query_obo_term(term_id), terms, level)
return terms
def go_term_ancestors_for_uniprot_id(uniprot_id: str, level: int, kinds_allowed: Iterable[str] = ['P', 'F', 'C']) -> Iterable[GOTerm]:
"""Gets the GO terms associated with a UniProt ID and returns a set of their ancestors at the specified level.
The traversal is restricted to is-a relationships.
Note that the level is the minimum number of steps to the root.
Arguments:
level: starting at 0 (root)
kinds_allowed: a set containing any combination of 'P', 'F', or 'C'
"""
if len(kinds_allowed) == 0: return []
terms = [term for term in go_terms_for_uniprot_id(uniprot_id) if term.kind in kinds_allowed]
ancestor_terms = set()
for term_id in [t.ID for t in terms]:
ancestor_terms.update(get_ancestors_of_go_term(term_id, level))
return ancestor_terms
def go_term_ancestors_for_uniprot_id_as_df(uniprot_id: str, level: int, kinds_allowed: Iterable[str] = ['P', 'F', 'C']) -> pd.DataFrame:
"""See go_term_ancestors_for_uniprot_id. Returns a Pandas DataFrame with columns IDand name."""
df = pd.DataFrame(columns=['ID', 'name'])
for term in go_term_ancestors_for_uniprot_id(uniprot_id, level, kinds_allowed):
df.loc[len(df)] = pd.Series({'ID': term.id, 'name': term.name, 'level': term.level})
return df.set_index('ID')
# Example: go_term_ancestors_for_uniprot_id_as_df('P42681', 2)
@dmyersturnbull
Copy link
Author

Result from the example (P42681; only first 5 rows)

screen shot 2016-03-22 at 12 22 11 pm

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment