Last active
August 29, 2015 13:56
-
-
Save ngopal/9164149 to your computer and use it in GitHub Desktop.
a quick python script to take consensuspathdb data and throw it into a running neo4j instance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This program takes the PPI data from ConsensusDB and populates the NEO4J database with it | |
# By Nikhil Gopal | |
# | |
# To run: python populate_running_db.py ConsensusDB_Human_PPI nodes_list.txt | |
# The code to generate the nodes_list.txt file exists here: https://gist.github.com/ngopal/9164294 | |
import os, sys | |
from neo4jrestclient.client import GraphDatabase | |
from itertools import chain, combinations | |
# Connect to graph database | |
gdb = GraphDatabase("http://localhost:7474/db/data/") | |
# Create a dictionary where gene names correspond to node objects | |
node_objects = {} | |
for i in open(sys.argv[2], 'r').readlines(): | |
if 'Label' not in i: | |
node_objects[i.strip('\n')] = "null" | |
else: | |
continue | |
# Go through the PPI file | |
for i in open(sys.argv[1], 'r').readlines(): | |
if '#' in i: | |
continue | |
else: | |
line = i.strip('\r\n').split('\t') | |
genes = line[2].replace('_HUMAN','').split(',') | |
score = line[3] | |
# create various combinations of genes and confidence scores | |
# i.e. [gene1, gene2, gene3] becomes [(gene1,gene2), (gene2,gene3), (gene1,gene3)] | |
# and each combination is associated with score1 | |
combos = [l for l in list(combinations(genes, 2))] | |
for gene1, gene2 in combos: | |
if "null" in node_objects[gene1]: | |
node_objects[gene1] = gdb.nodes.create(name=gene1) #node object has a property 'name' set to the value of gene1 | |
if "null" in node_objects[gene2]: | |
node_objects[gene2] = gdb.nodes.create(name=gene2) #node object has a property 'name' set to the value of gene2 | |
# node object has a relationship 'interacts' with another node object, and the relationship has a property set to | |
# the value of score | |
node_objects[gene1].relationships.create("interacts", node_objects[gene2], confidence=score) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment