Skip to content

Instantly share code, notes, and snippets.

@toniher
Created February 7, 2015 12:48
Show Gist options
  • Save toniher/2db1522dbfc5fe58b52c to your computer and use it in GitHub Desktop.
Save toniher/2db1522dbfc5fe58b52c to your computer and use it in GitHub Desktop.
py2neo 1.6 version of script requesting distance and parent from NCBI Taxonomy Neo4j database
#!/usr/bin/env python
from py2neo import neo4j
import sys
import logging
import argparse
from inspect import getmembers
from pprint import pprint
parser = argparse.ArgumentParser()
parser.add_argument("acc1",
help="First NCBI ID.")
parser.add_argument("acc2",
help="Second NCBI ID.")
opts=parser.parse_args()
logging.basicConfig(level=logging.INFO)
db = neo4j.GraphDatabaseService()
# Over this value we consider
max_distance = 1000
def get_distance_node( node1, node2, link='has_parent' ):
query = neo4j.CypherQuery(db, "START n=node("+str(node1)+"), m=node("+str(node2)+") MATCH p=shortestPath(n-[*]-m) RETURN length(relationships(p)) as distance")
# Dummy distance... too big
distance = 100
for record in query.stream():
distance = record[0]
return distance
def get_parent_node( node, link='has_parent' ):
query = neo4j.CypherQuery(db, "START n=node("+str(node)+") MATCH (n-[]->(m:TAXID)) RETURN m")
parent = 0
for record in query.stream():
parent = record[0]._id
print "ID: "+str(parent)
return parent
def parent_distance_node( node1, node2, link='has_parent' ):
parent1 = get_parent_node( node1, link )
distance = get_distance_node( parent1, node2, link )
return distance
def crawler_distance_node( node1, node2 ):
print "ITER"
linktype = 'has_parent'
distance = get_distance_node( node1, node2, linktype )
print "DISTANCE: "+str(distance)
if ( distance > max_distance ) :
return 0
else :
if ( distance == 0 ):
#print "EQUAL: "+node1
return node1
elif ( distance == 1 ):
print get_parent_node(node1, linktype)
print get_parent_node(node2, linktype)
if ( node2 == get_parent_node(node1, linktype) ):
#print "PARENT2: "+str(node2)
return node2
elif ( node1 == get_parent_node(node2, linktype) ):
#print "PARENT1: "+str(node1)
return node1
else :
return 0
else:
if ( parent_distance_node(node1, node2, linktype) >= distance ):
parent2 = get_parent_node(node2, linktype)
return crawler_distance_node( node1, parent2 )
else :
parent1 = get_parent_node(node1, linktype)
return crawler_distance_node( parent1, node2 )
if ( opts.acc1 == opts.acc2 ):
sys.exit("The same codes!")
TAXID = db.get_or_create_index(neo4j.Node, "TAXID")
node1list = TAXID.get("id", opts.acc1)
node2list = TAXID.get("id", opts.acc2)
node1 = 0
node2 = 0
pprint(node1list)
pprint(node2list)
for match in node1list:
node1 = match._id
for match in node2list:
node2 = match._id
print node1
print node2
#print get_distance_node( node1, node2 )
nodecommon = crawler_distance_node( node1, node2 )
if nodecommon == 0:
print "No match!"
else:
tmp = db.node(nodecommon)
props = tmp.get_properties()
print props['id']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment