Skip to content

Instantly share code, notes, and snippets.

Created August 21, 2013 12:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save anonymous/6293739 to your computer and use it in GitHub Desktop.
Save anonymous/6293739 to your computer and use it in GitHub Desktop.
py2neo helper class to do batch.submit() of lists of dicts by 'chunks' to avoid memory problems. The input 'list' looks like: [{key1:value, key2:value}, {key1:value,key2:value} , ...]
from py2neo import neo4j
class ChunkedIndexedBatchCreate(object):
"""
'Create' of large numbers of nodes is buggy or can fail due to memory issues.
This class is called with: py2neo.batch, py2neo.index, key (for index), index[value] (i.e. string which field of entity list is used), entity list, batch size
"""
def __init__(self,batch,index,key,value,list,size):
self.batch = batch
self.index = index
self.key = key
self.value = value
self.list = list
self.size = size
print 'chunked batch submit started'
def chunker(self, seq, size):
"""
Chunker gets a list and returns a generator of chunks of the input list with the given size.
"""
return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))
def submit(self):
"""
Function to chunk the list and submit the chunks
"""
# split the list
print '\tnumber of elements: ' + str(len(self.list))
chunks = self.chunker(self.list, self.size)
# index to count chunks
i = 1
for chunk in chunks:
print '\t' + str(i)
i += 1
for element in chunk:
self.batch.create_indexed_node_or_fail(self.index,self.key,element[self.value],element)
# submit batch for each chunk
self.batch.submit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment