Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
queryset_iterator, a function to iterate over huge Django querysets without using too much memory
import gc
def queryset_iterator(qs, batchsize = 500, gc_collect = True):
iterator = qs.values_list('pk', flat=True).order_by('pk').distinct().iterator()
eof = False
while not eof:
primary_key_buffer = []
try:
while len(primary_key_buffer) < batchsize:
primary_key_buffer.append(iterator.next())
except StopIteration:
eof = True
for obj in qs.filter(pk__in=primary_key_buffer).order_by('pk').iterator():
yield obj
if gc_collect:
gc.collect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.