public
Created

py2neo helper class to do batch.submit() of lists of dicts by 'chunks' to avoid memory problems. The input 'list' looks like: [{key1:value, key2:value}, {key1:value,key2:value} , ...]

  • Download Gist
py2neo_chunked_batch_submit.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
from py2neo import neo4j
 
class ChunkedIndexedBatchCreate(object):
"""
'Create' of large numbers of nodes is buggy or can fail due to memory issues.
 
This class is called with: py2neo.batch, py2neo.index, key (for index), index[value] (i.e. string which field of entity list is used), entity list, batch size
"""
def __init__(self,batch,index,key,value,list,size):
self.batch = batch
self.index = index
self.key = key
self.value = value
self.list = list
self.size = size
print 'chunked batch submit started'
 
 
def chunker(self, seq, size):
"""
Chunker gets a list and returns a generator of chunks of the input list with the given size.
"""
return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))
 
def submit(self):
"""
Function to chunk the list and submit the chunks
"""
# split the list
print '\tnumber of elements: ' + str(len(self.list))
chunks = self.chunker(self.list, self.size)
 
# index to count chunks
i = 1
for chunk in chunks:
print '\t' + str(i)
i += 1
for element in chunk:
self.batch.create_indexed_node_or_fail(self.index,self.key,element[self.value],element)
# submit batch for each chunk
self.batch.submit()

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.