Skip to content

Instantly share code, notes, and snippets.

@technige
Last active August 29, 2015 14:09
Show Gist options
  • Save technige/c61c12a38ca216076974 to your computer and use it in GitHub Desktop.
Save technige/c61c12a38ca216076974 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from __future__ import print_function
import random
from time import time
from py2neo import Graph, GraphError
CONSONANTS = "bcdfghjklmnprstvwz"
VOWELS = "aeiou"
CREATE_NODE = """\
CREATE (a:Person)
SET a = {A}
RETURN a
"""
CREATE_UNIQUE_RELATIONSHIP = """\
MATCH (a:Person), (b:Person)
WHERE a.user_id = {A} AND b.user_id = {B}
CREATE UNIQUE (a)-[ab:FOLLOWS]->(b)
RETURN ab
"""
def random_name_sequence():
while True:
words = []
for n in range(2):
word = [random.choice(CONSONANTS).upper()]
for syllable in range(random.randint(1, 4)):
word.append(random.choice(VOWELS))
word.append(random.choice(CONSONANTS))
words.append("".join(word))
yield " ".join(words)
class RandomGraphGenerator(object):
def __init__(self, graph):
self.graph = graph
try:
self.graph.schema.create_uniqueness_constraint("Person", "user_id")
except GraphError:
pass
self.count = 0
self.names = random_name_sequence()
def create_nodes(self, count):
""" Create a number of nodes in a single Cypher transaction.
"""
tx = self.graph.cypher.begin()
for i in range(count):
self.count += 1
parameters = {
"A": {
"user_id": self.count,
"name": next(self.names),
"born": random.randint(1900, 1999),
}
}
tx.append(CREATE_NODE, parameters)
tx.commit()
def create_unique_relationships(self, count):
""" Create a number of unique relationships in a single Cypher transaction.
"""
tx = self.graph.cypher.begin()
for i in range(count):
start_user_id = random.randint(1, self.count)
end_user_id = start_user_id
while end_user_id == start_user_id:
end_user_id = random.randint(1, self.count)
parameters = {
"A": start_user_id,
"B": end_user_id,
}
tx.append(CREATE_UNIQUE_RELATIONSHIP, parameters)
tx.commit()
def main():
total = 120000
tx_size = 1000
graph = Graph()
generator = RandomGraphGenerator(graph)
print("Creating %d nodes and merging %d relationships in "
"batches of %d" % (total, total, tx_size))
t0 = time()
for i in range(total // tx_size):
# Create nodes
t1 = time()
generator.create_nodes(tx_size)
t2 = time()
print("Created %d nodes in %f seconds" % (tx_size, t2 - t1))
# Create relationships
t3 = time()
generator.create_unique_relationships(tx_size)
t4 = time()
print("Merged %d relationships in %f seconds" % (tx_size, t4 - t3))
t5 = time()
print("Entire bulk import took %f seconds" % (t5 - t0))
if __name__ == "__main__":
# Run this script against a fresh database then use the browser to explore
# the data created with a query such as `MATCH (p:Person {user_id:1}) RETURN p`
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment