Skip to content

Instantly share code, notes, and snippets.

@msalvadores
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save msalvadores/bc9c874f230ec8100ea6 to your computer and use it in GitHub Desktop.
Save msalvadores/bc9c874f230ec8100ea6 to your computer and use it in GitHub Desktop.
A simple script to traverse all GO classes
import requests
import time
import pdb
import json
import sys
import urllib
FILE_NAME = "ontology_ids_and_parents.txt"
APIKEY = 'YOUR KEY'
def get_all_classes():
more_pages = True
payload = { 'apikey' : APIKEY , 'page' : 1,
'include' : 'parents' }
out = open(FILE_NAME,"w")
while more_pages:
print "Page %d"%payload['page'],
sys.stdout.flush()
r = requests.get("http://data.bioontology.org/ontologies/GO/classes",
params=payload)
print " OK"
data = json.loads(r.text)
if 'collection' not in data:
pdb.set_trace()
for item in data['collection']:
out.write(item["@id"])
for parent in item['parents']:
out.write("\t" + parent["@id"])
out.write("\n")
if 'nextPage' not in data:
payload['page'] = None
else:
payload['page'] = data['nextPage']
more_pages = payload['page'] != None
out.close()
def test_coverage_parents():
classes = set()
parents = set()
with open(FILE_NAME,"r") as fin:
for line in fin:
line = line.strip()
data = line.split("\t")
classes.add(data[0])
for parent in data[1:]:
parents.add(parent)
fin.close()
missing = parents.difference(classes)
for cls in missing:
print "Testing ", cls
payload = { 'apikey' : APIKEY }
cls = urllib.quote(cls,'')
cls_call = "http://data.bioontology.org/ontologies/GO/classes/%s"%cls
r = requests.get(cls_call,
params=payload)
time.sleep(1)
print "\t",r.headers['status']
get_all_classes()
test_coverage_parents()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment