Created
February 11, 2021 13:39
-
-
Save caodac/e192546eaa8b40c8482ce4014f97e2e1 to your computer and use it in GitHub Desktop.
Count the number of leaf nodes for each HPO category under Phenotypic Abnormality (HP:0000118)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from neo4j import GraphDatabase | |
from jinja2 import Template | |
import json | |
QUERY=""" | |
match p=(d:DATA)-[:PAYLOAD]->(n:S_HP)-[e:R_subClassOf*0..11]->(m:S_HP)-[e2:R_subClassOf]->(:S_HP)<-[:PAYLOAD]-(z) | |
where z.notation = 'HP:0000118' | |
and d.notation='{{ leaf }}' | |
and all(x in e where x.source=n.source or n.source in x.source) | |
and (e2.source=n.source or n.source in e2.source) | |
with d,m match (m)<-[:PAYLOAD]-(z) | |
return distinct z.notation as ID, z.label as LABEL | |
""" | |
uri = "bolt://disease.ncats.io:80" | |
driver = GraphDatabase.driver(uri, auth=("neo4j", "")) | |
template = Template(QUERY) | |
def run_cypher(tx, query): | |
leafs = [] | |
for row in tx.run(query): | |
leafs.append((row['ID'], row['LABEL'])) | |
return leafs | |
def get_leafs(): | |
CYPHER=""" | |
match p=(d)-[:PAYLOAD]->(n:S_HP) | |
where not (n)<-[:R_subClassOf]-() | |
and not n:TRANSIENT | |
and not n:AnnotationProperty | |
and not n:ObjectProperty | |
and d.notation =~ 'HP:.*' | |
return d.notation as ID, d.label as LABEL | |
order by d.notation desc | |
""" | |
with driver.session() as session: | |
return session.read_transaction(run_cypher, CYPHER) | |
def get_categories(leaf): | |
with driver.session() as session: | |
return session.read_transaction(run_cypher, template.render(leaf=leaf)) | |
if __name__ == '__main__': | |
leafs = get_leafs() | |
categories = {} | |
for l in leafs: | |
cats = get_categories(l[0]) | |
#print ('%s: %s' % l) | |
for cat in cats: | |
#print ('... %s %s' % cat) | |
if cat[0] not in categories: | |
categories[cat[0]] = { | |
'label': cat[1], | |
'count': 1 | |
} | |
else: | |
c = categories[cat[0]]['count'] | |
categories[cat[0]]['count'] = c+1 | |
print (json.dumps(categories, indent=2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script can be replaced with this cypher query!