Instantly share code, notes, and snippets.

Embed
What would you like to do?
"""
Django ORM Optimization Tips
Caveats:
* Only use optimizations that obfuscate the code if you need to.
* Not all of these tips are hard and fast rules.
* Use your judgement to determine what improvements are appropriate for your code.
"""
# ---------------------------------------------------------------------------
# 1. Profile
# ---------------------------------------------------------------------------
## Use these tools:
## * django-debug-toolbar
## * QuerySet.explain()
# ---------------------------------------------------------------------------
# 2. Be aware of QuerySet's lazy evaluation.
# ---------------------------------------------------------------------------
## 2a. When QuerySets are evaluated
# Iteration
for person in Person.objects.all():
# Some logic
# Slicing/Indexing
Person.objects.all()[0]
# Pickling (i.e. serialization)
pickle.dumps(Person.objects.all())
# Evaluation functions
repr(Person.objects.all())
len(Person.objects.all())
list(Person.objects.all())
bool(Person.objects.all())
# Other
[person for person in Person.objects.all()] # List comprehensions
person in Person.objects.all() # `in` checks
## 2b. When QuerySets are cached/not cached
### Not Cached
# Not reusing evaluated QuerySets
print([p.name for p in Person.objects.all()]) # QuerySet evaluated and cached
print([p.name for p in Person.objects.all()]) # New QuerySet is evaluated and cached
# Slicing/indexing unevaluated QuerySets
queryset = Person.objects.all()
print(queryset[0]) # Queries the database
print(queryset[0]) # Queries the database again
# Printing
print(Person.objects.all())
### Cached
# Reusing an evaluated QuerySet
queryset = Person.objects.all()
print([p.name for p in queryset]) # QuerySet evaluated and cached
print([p.name for p in queryset]) # Cached results are used
# Slicing/indexing evaluated QuerySets
queryset = Person.objects.all()
list(queryset) # Queryset evaluated and cached
print(queryset[0]) # Cache used
print(queryset[0]) # Cache used
# ---------------------------------------------------------------------------
# 3. Be aware of which attributes are not cached.
# ---------------------------------------------------------------------------
## Not initially retrieved/cached
# Foreign-key related objects
person = Person.objects.get(id=1)
person.father # foreign object is retrieved and cached
person.father # cached version is used
## Never cached
# Callable attributes
person = Person.objects.get(id=1)
person.children.all() # Database hit
person.children.all() # Another database hit
# ---------------------------------------------------------------------------
# 4. Use select_related() and prefetch_related() when you will need everything.
# ---------------------------------------------------------------------------
# DON'T
queryset = Person.objects.all()
for person in queryset:
person.father # Foreign key relationship results in a database hit each iteration
# DO
queryset = Person.objects.all().select_related('father') # Foreign key object is included in query and cached
for person in queryset:
person.father # Hits the cache instead of the database
# ---------------------------------------------------------------------------
# 5. Try to avoid database queries in a loop.
# ---------------------------------------------------------------------------
# DON'T (contrived example)
filtered = Person.objects.filter(first_name='Shallan', last_name='Davar')
for age in range(18):
person = filtered.get(age=age) # Database query on each iteration
# DO (contrived example)
filtered = Person.objects.filter( # Narrow down the QuerySet to only what you need
first_name='Shallan',
last_name='Davar',
age_gte=0,
age_lte=18,
)
lookup = {person.age: person for person in filtered} # Evaluate the QuerySet and construct lookup
for age in range(18):
person = lookup[age] # No database query
# ---------------------------------------------------------------------------
# 6. Use iterator() to iterate through a very large QuerySet only once.
# ---------------------------------------------------------------------------
# Save memory by not caching anything
for person in Person.objects.iterator():
# Some logic
# ---------------------------------------------------------------------------
# 7. Do work in the database rather than in Python.
# ---------------------------------------------------------------------------
## 7a. Use filter() and exclude()
# DON'T
for person in Person.objects.all():
if person.age >= 18:
# Do something
# DO
for person in Person.objects.filter(age__gte=18):
# Do something
## 7b. Use F expressions
# DON'T
for person in Person.objects.all():
person.age += 1
person.save()
# DO
Person.objects.update(age=F('age') + 1)
## 7c. Do aggregation in the database, if possible
# DON'T
max_age = 0
for person in Person.objects.all():
if person.age > max_age:
max_age = person.age
# DO
max_age = Person.objects.all().aggregate(Max('age'))['age__max']
# ---------------------------------------------------------------------------
# 8. Use values() and values_list() to get only the things you need.
# ---------------------------------------------------------------------------
## 8a. Use values()
# DON'T
age_lookup = {
person.name: person.age
for person in Person.objects.all()
}
# DO
age_lookup = {
person['name']: person['age']
for person in Person.objects.values('name', 'age')
}
## 8b. Use values_list()
# DON'T
person_ids = [person.id for person in Person.objects.all()]
# DO
person_ids = Person.objects.values_list('id', flat=True)
# ---------------------------------------------------------------------------
# 9. Use defer() and only() when you know you won't need certain fields.
#
# * Use when you need a QuerySet instead of a list of dicts from values().
# * Really only useful to defer fields that require significant processing to convert to a python object.
# ---------------------------------------------------------------------------
## 9a. Use defer()
queryset = Person.objects.defer('age') # Imagine age is computationally expensive
for person in queryset:
print(person.id)
print(person.name)
## 9b. Use only()
queryset = Person.objects.only('name')
for person in queryset:
print(person.name)
# ---------------------------------------------------------------------------
# 10. Use count() and exists() when you don't need the contents of the QuerySet.
#
# * Caveat: Only use these when you don't need to evaluate the QuerySet.
# ---------------------------------------------------------------------------
## 10a. Use count()
# DON'T
count = len(Person.objects.all()) # Evaluates the entire queryset
# DO
count = Person.objects.count() # Executes more efficient SQL to determine count
## 10b. Use exists()
# DON'T
exists = len(Person.objects.all()) > 0
# DO
exists = Person.objects.exists()
# ---------------------------------------------------------------------------
# 11. Use delete() and update() when possible.
# ---------------------------------------------------------------------------
## 11a. Use delete()
# DON'T
for person in Person.objects.all():
person.delete()
# DO
Person.objects.all().delete()
## 11b. Use update()
# DON'T
for person in Person.objects.all():
person.age = 0
person.save()
# DO
Person.objects.update(age=0)
# ---------------------------------------------------------------------------
# 12. Use bulk_create() when possible.
#
# * Caveats: https://docs.djangoproject.com/en/2.1/ref/models/querysets/#django.db.models.query.QuerySet.bulk_create
# ---------------------------------------------------------------------------
# Bulk Create
names = ['Jeff', 'Beth', 'Tim']
creates = []
for name in names:
creates.append(
Person(name=name, age=0)
)
Person.objects.bulk_create(creates)
# Bulk add to many-to-many fields
person = Person.objects.get(id=1)
person.jobs.add(job1, job2, job3)
# ---------------------------------------------------------------------------
# 13. Use foreign key values directly.
# ---------------------------------------------------------------------------
# DON'T
father_id = Person.objects.get(id=1).father.id # Causes a needless database query
# DO
father_id = Person.objects.get(id=1).father_id # The foreign key is already cached. No query
@daxaxelrod

This comment has been minimized.

Copy link

daxaxelrod commented Nov 5, 2018

Great write up, thank you. I didn't realize that slices were not cached if the lookup isn't evaluated. Learned a ton.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment