Skip to content

Instantly share code, notes, and snippets.

@un1t
Created July 4, 2016 17:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save un1t/536f4391b6091097aae5e40b57e61f68 to your computer and use it in GitHub Desktop.
Save un1t/536f4391b6091097aae5e40b57e61f68 to your computer and use it in GitHub Desktop.
import gc
from more_itertools import chunked
from collections import defaultdict
def fetch_fk(fk_field, fields, objects):
assert 'id' in fields
if not objects:
return
relates_ids = [obj[fk_field.field.column] for obj in objects]
related_objects = fk_field.get_queryset().filter(id__in=relates_ids).values(*fields)
related_dict = {obj['id']:obj for obj in related_objects}
for obj in objects:
obj[fk_field.field.name] = related_dict.get(obj[fk_field.field.column])
def fetch_m2m(m2m_field, fields, objects):
assert 'id' in fields
if not objects:
return []
objects_ids = [obj['id'] for obj in objects]
related_model = m2m_field.field.related_model
m2m_column_name = m2m_field.field.m2m_column_name()
m2m_reverse_name= m2m_field.field.m2m_reverse_name()
related_queryset = m2m_field.through.objects.all()
throughs = related_queryset.filter(**{m2m_column_name + '__in': objects_ids}).values()
through_dict = defaultdict(list)
for t in throughs:
through_dict[t[m2m_column_name]].append(t[m2m_reverse_name])
related_ids = [t[m2m_reverse_name] for t in throughs]
related_objects = related_model.objects.filter(id__in=related_ids).values(*fields)
related_objects_dict = {obj['id']: obj for obj in related_objects}
for obj in objects:
obj[m2m_field.field.name] = []
related_ids = through_dict[obj['id']]
for related_id in related_ids:
related_obj = related_objects_dict[related_id]
obj[m2m_field.field.name].append(related_obj)
def fast_model_to_dict(instance, fields=None, exclude=None):
# TODO: filefield
if fields is None:
fields = get_field_names(instance)
if exclude:
for field in exclude:
fields.remove(field)
d = {}
for field in fields:
d[field] = getattr(instance, field)
return d
def get_field_names(obj):
return [field.name for field in obj._meta.fields if not field.rel]
def queryset_iterator(queryset, chunksize=1000):
'''''
Iterate over a Django Queryset ordered by the primary key
This method loads a maximum of chunksize (default: 1000) rows in it's
memory at the same time while django normally would load all rows in it's
memory. Using the iterator() method only causes it to not preload all the
classes.
Note that the implementation of the iterator does not support ordered query sets.
'''
pk = 0
try:
last_pk = queryset.order_by('-pk')[0].pk
except IndexError:
return
queryset = queryset.order_by('pk')
while pk < last_pk:
for obj in queryset.filter(pk__gt=pk)[:chunksize]:
pk = obj.pk
yield obj
gc.collect()
def chunked_queryset(queryset, chunksize=1000):
for chunk in chunked(queryset_iterator(queryset, chunksize), chunksize):
yield chunk
def dictfetchall(cursor):
"Return all rows from a cursor as a dict"
columns = [col[0] for col in cursor.description]
return [
dict(zip(columns, row))
for row in cursor.fetchall()
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment