Skip to content

Instantly share code, notes, and snippets.

@JnBrymn
Created March 21, 2014 12:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JnBrymn/9685567 to your computer and use it in GitHub Desktop.
Save JnBrymn/9685567 to your computer and use it in GitHub Desktop.
This tests 3 different algorithms for insert a user with their friends.
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import datetime\n",
"import random\n",
"from py2neo import neo4j\n",
"graph = neo4j.GraphDatabaseService(\"http://localhost:7474/db/data\")\n",
"\n",
"try:\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" CREATE CONSTRAINT ON (u:User).\n",
" ASSERT u.id IS UNIQUE\n",
" \"\"\").execute()\n",
"except:\n",
" pass\n",
"\n",
"def delete_everything():\n",
" neo4j.CypherQuery(graph,\"MATCH ()-[r]-() DELETE r\").run()\n",
" neo4j.CypherQuery(graph,\"MATCH (n) DELETE n\").run()\n",
" \n",
"def count_relationships():\n",
" return neo4j.CypherQuery(graph,\"MATCH ()-[r]-() RETURN COUNT(r)\").execute_one()\n",
"\n",
"def count_nodes():\n",
" return neo4j.CypherQuery(graph,\"MATCH (n) RETURN COUNT(n)\").execute_one()\n",
"\n",
" \n",
"def rand_id():\n",
" \"\"\"gets a random id\n",
" \n",
" It's equally likely to get an id from 0-5000 as it \n",
" is to get an id from 5000-inf.\n",
" \"\"\"\n",
" return int(random.expovariate(1.0/5000))\n",
"\n",
"def try_algo(insert_friends_algo):\n",
" \"\"\"uses algorithm to insert random relationship\n",
" \n",
" For 100 iterations this inserts a user and their 200 friends.\n",
" User and friend ids are selected at random.\n",
" Prints time required to insert.\n",
" \n",
" All relationships and nodes are removed after test completes.\n",
" \"\"\"\n",
" start_time = datetime.datetime.now()\n",
" for i in range(100):\n",
" id = rand_id()\n",
" num_friends = 200\n",
" ids = []\n",
" for j in range(num_friends):\n",
" ids.append(rand_id())\n",
" insert_friends_algo(id,ids)\n",
" time_required = (datetime.datetime.now() - start_time).total_seconds()\n",
" rel_count = count_relationships()\n",
" node_count = count_nodes()\n",
" delete_everything()\n",
" print \"time:%fs;\\trelationship count:%d;\\tnode count%d; rel/sec:%f;\\tnode/sec:%f\" % (time_required,rel_count,node_count,rel_count/time_required,node_count/time_required)\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 309
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def insert_friends1(id,ids): \n",
" \"\"\"this was my original query\"\"\"\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" MERGE (a:User {id:{id}})\n",
" ON CREATE SET\n",
" a.created_at = timestamp()\n",
" SET a.friends_found_at = timestamp()\n",
" FOREACH (id IN {ids} |\n",
" MERGE (b:User {id:id})\n",
" ON CREATE SET\n",
" b.created_at = timestamp()\n",
" CREATE UNIQUE (a)-[:FOLLOWS]->(b)\n",
" )\n",
" \"\"\").execute(id=id,ids=ids)\n",
"\n",
"def insert_friends2(id,ids):\n",
" \"\"\"interestingly this one doesn't work\n",
" \n",
" I was trying to seperate out user creating and relationship connection\n",
" MERGE is allowed within a FOREACH, but MATCH isn't\n",
" \"\"\"\n",
" all_ids = ids\n",
" all_ids.append(id)\n",
" for u in all_ids :\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" MERGE (u:User {id:{id}})\n",
" ON CREATE SET\n",
" u.created_at = timestamp()\n",
" \"\"\").execute(id=u)\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" MATCH (a:User {id:{id}})\n",
" SET a.friends_found_at = timestamp()\n",
" FOREACH (id IN {ids} | \n",
" MATCH (b:User {id:id})\n",
" CREATE UNIQUE (a)-[:FOLLOWS]->(b)\n",
" )\"\"\").execute(id=id,ids=ids)\n",
" \n",
"def insert_friends3(id,ids):\n",
" \"\"\"here I create users one at a time in a loop\n",
" and then connect them one at a time in loop\n",
" \"\"\"\n",
" all_ids = ids\n",
" all_ids.append(id)\n",
" for u in all_ids :\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" MERGE (u:User {id:{id}})\n",
" ON CREATE SET\n",
" u.created_at = timestamp()\n",
" \"\"\").execute(id=u)\n",
" for friend_id in ids :\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" MATCH (a:User {id:{id}}),(b:User {id:{friend_id}})\n",
" SET a.friends_found_at = timestamp()\n",
" CREATE UNIQUE (a)-[:FOLLOWS]->(b)\n",
" \"\"\").execute(id=id,friend_id=friend_id)\n",
" \n",
"def insert_friends4(id,ids):\n",
" \"\"\"here I create users one at a time but connect them all at once\"\"\"\n",
" all_ids = ids\n",
" all_ids.append(id)\n",
" for u in all_ids :\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" MERGE (u:User {id:{id}})\n",
" ON CREATE SET\n",
" u.created_at = timestamp()\"\"\").execute(id=u)\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" MATCH (a:User {id:{id}}),(b:User)\n",
" WHERE b.id in {ids}\n",
" SET a.friends_found_at = timestamp()\n",
" CREATE UNIQUE (a)-[:FOLLOWS]->(b)\"\"\").execute(id=id,ids=ids)\n",
" \n",
"def insert_friends5(id,ids):\n",
" \"\"\"here I tried to create all users in one query and connect all users in one query\n",
" \n",
" however, the user creation query is invalid (again MATCH vs MERGE)\n",
" \"\"\"\n",
" all_ids = ids\n",
" all_ids.append(id)\n",
" for u in all_ids :\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" MERGE (u:User)\n",
" WHERE u.id IN ids\n",
" ON CREATE SET\n",
" u.created_at = timestamp()\"\"\").execute(ids=all_ids)\n",
" neo4j.CypherQuery(graph,\"\"\"\n",
" MATCH (a:User {id:{id}}),(b:User)\n",
" WHERE b.id in {ids}\n",
" SET a.friends_found_at = timestamp()\n",
" CREATE UNIQUE (a)-[:FOLLOWS]->(b)\"\"\").execute(id=id,ids=ids)\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 310
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try_algo(insert_friends1)\n",
"try_algo(insert_friends3)\n",
"try_algo(insert_friends4)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"time:248.569471s;\trelationship count:51064;\tnode count11155; rel/sec:205.431503;\tnode/sec:44.876790\n",
"time:4184.942574s;\trelationship count:39701;\tnode count9917; rel/sec:9.486630;\tnode/sec:2.369686"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"time:25518.941203s;\trelationship count:39639;\tnode count9803; rel/sec:1.553317;\tnode/sec:0.384146"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 311
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment