Created
October 12, 2021 15:16
-
-
Save dustine32/4bf7d86e889d47f3be2be8c3127bcd58 to your computer and use it in GitHub Desktop.
Generate the PANTHER slim given a list of IBDs in GAF format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ontobio.ontol_factory import OntologyFactory | |
from ontobio.assoc_factory import AssociationSetFactory | |
from ontobio.io.ontol_renderers import GraphRenderer, OboFormatGraphRenderer | |
from ontobio.io.gafparser import GafParser | |
from ontobio.util.go_utils import GoAspector | |
import argparse | |
import json | |
import os | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-c', '--usage_count_constraint') | |
parser.add_argument('-g', '--gaf_file') | |
parser.add_argument('-o', '--outfile') | |
parser.add_argument('-u', '--use_cache', action='store_const', const=True) | |
parser.add_argument('-w', '--ontology_file') | |
USAGE_COUNT_CONSTRAINT = 5 | |
ONLY_SHARED_ANCESTORS = True | |
RELATIONS = ["subClassOf","BFO:0000050"] | |
GAF_FILE = "/Users/ebertdu/Downloads/ancestor_node_association.paint.gaf" | |
OUTFILE = "panther_slim_shared.obo" | |
GO_ASPECT_LKP = {} | |
def get_aspect(go_term, go_ontology): | |
global GO_ASPECT_LKP | |
aspect = GO_ASPECT_LKP.get(go_term) | |
if aspect is None: | |
aspect = GoAspector(go_ontology).go_aspect(go_term) | |
GO_ASPECT_LKP[go_term] = aspect | |
return aspect | |
def dump_to_json(filename, terms): | |
with open(filename, "w+") as f: | |
f.write(json.dumps(terms)) | |
def get_from_json(json_file): | |
with open(json_file) as f: | |
terms = json.loads(f.read()) | |
print("File '{}' used to load term count dictionary - {} keys loaded".format(json_file, len(terms))) | |
return terms | |
def term_usage_count(term, associations): | |
usage_count = 0 | |
for a in associations: | |
if a["object"]["id"] == term: | |
usage_count += 1 | |
return usage_count | |
def get_common_terms(ontology, gaf_file, usage_count_constraint=5, regen_cache=True): | |
terms = {} | |
tmp_terms_fname = "/tmp/{}.json".format(os.path.basename(gaf_file)) | |
assocs = GafParser().parse(gaf_file, skipheader=True) | |
if not regen_cache and os.path.isfile(tmp_terms_fname): | |
terms = get_from_json(tmp_terms_fname) | |
else: | |
for a in assocs: | |
# These counts will be overwritten, just initializing term keys at this point | |
if a["object"]["id"] not in terms: | |
terms[a["object"]["id"]] = 1 | |
else: | |
terms[a["object"]["id"]] += 1 | |
cached_counts = {} | |
prog_counter = 0 | |
print("{} terms to go through".format(len(terms))) | |
for t in terms: | |
prog_counter += 1 | |
# print("Currently on {}".format(prog_counter)) | |
if t in cached_counts: | |
t_count = cached_counts[t] | |
else: | |
t_count = term_usage_count(t, assocs) | |
cached_counts[t] = t_count | |
desc_terms = ontology.subontology(ontology.descendants(t), relations=RELATIONS).nodes() | |
t_asp = get_aspect(t, ontology) | |
for dt in desc_terms: | |
dt_asp = get_aspect(dt, ontology) | |
if dt_asp != t_asp: # Don't cross the aspects! | |
continue | |
if dt in cached_counts: | |
d_count = cached_counts[dt] | |
else: | |
d_count = term_usage_count(dt, assocs) | |
cached_counts[dt] = d_count | |
t_count += d_count | |
terms[t] = t_count | |
# Cache terms | |
dump_to_json(tmp_terms_fname, terms) | |
common_terms = {} | |
for t in terms: | |
if terms[t] >= usage_count_constraint: | |
common_terms[t] = terms[t] | |
return common_terms | |
def fill_in_relations(subontology, ontology_orig): | |
for term in subontology.nodes(): | |
for ancestor in subontology.nodes(): | |
if term == ancestor: | |
continue | |
sub_ancestors = subontology.subontology(subontology.ancestors(term) + [term]) | |
orig_ancestors = ontology_orig.subontology(ontology_orig.ancestors(term) + [term]) | |
for rel in ['subClassOf','BFO:0000050']: | |
rel_sub_ancestors = sub_ancestors.ancestors(term, relations=[rel]) | |
rel_orig_ancestors = orig_ancestors.ancestors(term, relations=[rel]) | |
if ancestor not in rel_sub_ancestors and ancestor in rel_orig_ancestors: | |
subontology.graph.add_edge(ancestor, term, pred=rel) | |
return subontology | |
if __name__ == "__main__": | |
args = parser.parse_args() | |
USAGE_COUNT_CONSTRAINT = int(args.usage_count_constraint) | |
GAF_FILE = args.gaf_file | |
OUTFILE = args.outfile | |
regen_cache = None | |
regen_cache = True | |
if args.use_cache: | |
regen_cache = False | |
# ont = OntologyFactory().create("/Users/ebertdu/Downloads/go.owl") | |
ont = OntologyFactory().create(args.ontology_file) | |
# aset = AssociationSetFactory().create(ont, file=GAF_FILE) | |
common_terms = get_common_terms(ont, GAF_FILE, USAGE_COUNT_CONSTRAINT, regen_cache) | |
print("Grabbed {} common terms".format(len(common_terms))) | |
all_terms = [] | |
term_to_ancestors = {} | |
for t in common_terms: | |
subont = ont.subontology(ont.ancestors(t), relations=RELATIONS) | |
term_to_ancestors[t] = subont.nodes() # Keep ancestor list in case we want to only include common ancestors | |
for n in subont.nodes(): | |
if n not in all_terms: | |
all_terms.append(n) | |
print("Grabbed all ancestors") | |
if ONLY_SHARED_ANCESTORS: | |
shared_ancestors = [] | |
for t in common_terms: | |
for anc in term_to_ancestors[t]: | |
## x-y term matrix checking for shared ancestors in multiple ancestor sets | |
for other_t in common_terms: | |
if other_t == t: | |
continue | |
else: | |
if anc in term_to_ancestors[other_t]: | |
shared_ancestors.append(anc) | |
all_terms = set(shared_ancestors + list(common_terms.keys())) | |
print("Filtered for shared ancestors only") | |
print("{} terms included in panther_slim".format(len(all_terms))) | |
hierarchy_ont = ont.subontology(all_terms, relations=RELATIONS) | |
# hierarchy_ont = fill_in_relations(hierarchy_ont, ont) | |
with open(OUTFILE, "w+") as term_file: | |
for t in all_terms: | |
term_file.write(t + "\n") | |
# Write out obo file from here instead of owltools | |
# renderer = GraphRenderer.create("obo") | |
# renderer.outfile = OUTFILE | |
# renderer.write(hierarchy_ont) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment