Skip to content

anonymous /py2neo_chunked_batch_submit.py
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
py2neo helper class to do batch.submit() of lists of dicts by 'chunks' to avoid memory problems. The input 'list' looks like: [{key1:value, key2:value}, {key1:value,key2:value} , ...]
from py2neo import neo4j
class ChunkedIndexedBatchCreate(object):
"""
'Create' of large numbers of nodes is buggy or can fail due to memory issues.
This class is called with: py2neo.batch, py2neo.index, key (for index), index[value] (i.e. string which field of entity list is used), entity list, batch size
"""
def __init__(self,batch,index,key,value,list,size):
self.batch = batch
self.index = index
self.key = key
self.value = value
self.list = list
self.size = size
print 'chunked batch submit started'
def chunker(self, seq, size):
"""
Chunker gets a list and returns a generator of chunks of the input list with the given size.
"""
return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))
def submit(self):
"""
Function to chunk the list and submit the chunks
"""
# split the list
print '\tnumber of elements: ' + str(len(self.list))
chunks = self.chunker(self.list, self.size)
# index to count chunks
i = 1
for chunk in chunks:
print '\t' + str(i)
i += 1
for element in chunk:
self.batch.create_indexed_node_or_fail(self.index,self.key,element[self.value],element)
# submit batch for each chunk
self.batch.submit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.