Create a gist now

Instantly share code, notes, and snippets.

py2neo helper class to do batch.submit() of lists of dicts by 'chunks' to avoid memory problems. The input 'list' looks like: [{key1:value, key2:value}, {key1:value,key2:value} , ...]
from py2neo import neo4j
class ChunkedIndexedBatchCreate(object):
'Create' of large numbers of nodes is buggy or can fail due to memory issues.
This class is called with: py2neo.batch, py2neo.index, key (for index), index[value] (i.e. string which field of entity list is used), entity list, batch size
def __init__(self,batch,index,key,value,list,size):
self.batch = batch
self.index = index
self.key = key
self.value = value
self.list = list
self.size = size
print 'chunked batch submit started'
def chunker(self, seq, size):
Chunker gets a list and returns a generator of chunks of the input list with the given size.
return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))
def submit(self):
Function to chunk the list and submit the chunks
# split the list
print '\tnumber of elements: ' + str(len(self.list))
chunks = self.chunker(self.list, self.size)
# index to count chunks
i = 1
for chunk in chunks:
print '\t' + str(i)
i += 1
for element in chunk:
# submit batch for each chunk
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment