Created
August 21, 2013 12:16
-
-
Save anonymous/6293739 to your computer and use it in GitHub Desktop.
py2neo helper class to do batch.submit() of lists of dicts by 'chunks' to avoid memory problems. The input 'list' looks like:
[{key1:value, key2:value}, {key1:value,key2:value} , ...]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from py2neo import neo4j | |
class ChunkedIndexedBatchCreate(object): | |
""" | |
'Create' of large numbers of nodes is buggy or can fail due to memory issues. | |
This class is called with: py2neo.batch, py2neo.index, key (for index), index[value] (i.e. string which field of entity list is used), entity list, batch size | |
""" | |
def __init__(self,batch,index,key,value,list,size): | |
self.batch = batch | |
self.index = index | |
self.key = key | |
self.value = value | |
self.list = list | |
self.size = size | |
print 'chunked batch submit started' | |
def chunker(self, seq, size): | |
""" | |
Chunker gets a list and returns a generator of chunks of the input list with the given size. | |
""" | |
return (seq[pos:pos + size] for pos in xrange(0, len(seq), size)) | |
def submit(self): | |
""" | |
Function to chunk the list and submit the chunks | |
""" | |
# split the list | |
print '\tnumber of elements: ' + str(len(self.list)) | |
chunks = self.chunker(self.list, self.size) | |
# index to count chunks | |
i = 1 | |
for chunk in chunks: | |
print '\t' + str(i) | |
i += 1 | |
for element in chunk: | |
self.batch.create_indexed_node_or_fail(self.index,self.key,element[self.value],element) | |
# submit batch for each chunk | |
self.batch.submit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment