Skip to content

Instantly share code, notes, and snippets.

@blackrobot
Created November 14, 2019 18:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blackrobot/e55369080490eadfda84cee06ec3d0a8 to your computer and use it in GitHub Desktop.
Save blackrobot/e55369080490eadfda84cee06ec3d0a8 to your computer and use it in GitHub Desktop.
An example of how to stream complex django querysets without response timeouts | https://stackoverflow.com/a/58828200/28360
"""
This is an example of how to traverse relations and implement complex
django querysets. It's in relation to the follow-up questions asked by
Jérémy Octeau's in the comments on this Stack Overflow post:
https://stackoverflow.com/a/58828200/28360
"""
# Some example models
from django.conf import settings
from django.db import models
class Profile(models.Model):
user = models.OneToOneField(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
phone = models.CharField()
address = models.TextField()
class Magazine(models.Model):
name = models.CharField()
class Subscription(models.Model):
user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
magazine = models.ForeignKey(Magazine, on_delete=models.CASCADE)
created = models.DateField(auto_now_add=True)
renewal_price = models.DecimalField(max_digits=6, decimal_places=2)
# Here's how you might generate the queryset for the csv
from django.contrib.auth import get_user_model
from django.db.models import OuterRef, Subquery
def get_user_export_values():
User = get_user_model()
user_queryset = User.objects.all()
# To get the most recent subscription, you can use a subquery
# https://docs.djangoproject.com/en/2.2/ref/models/expressions/#subquery-expressions
sub_queryset = (
Subscription.objects.all()
.filter(user=OuterRef("pk"))
.order_by("-created")
)
user_queryset = user_queryset.annotate(
latest_sub_created=sub_queryset.values("created")[:1],
latest_sub_renewal_price=sub_queryset.values("renewal_price")[:1],
latest_sub_magazine=sub_queryset.values("magazine__name")[:1],
)
return user_queryset.values_list(
"first_name",
"last_name",
"email",
###
# In your comment, you wrote:
#
# > But when I use the "profile__phone", I will get the ID of the
# > Object Phone. How can I modify that?
#
# This is incorrect. Using "profile__phone" will give you the value
# of `user.profile.phone` if the user has a `profile`, otherwise it
# will be `None`. It's the same for "profile__address" which I've added
# to my example. (The `None` values will appear as the word "None" in
# the csv file -- more on this later.)
#
# If you want the `user.profile.id` as well, you can add "profile__id"
# which I've left commented out below.
###
"profile__phone",
"profile__address",
# "profile__id",
# These are the subquery values added in the `annotate()` method.
"latest_sub_created",
"latest_sub_renewal_price",
"latest_sub_magazine",
# This enables model-like access to each row in the queryset without
# the overhead of loading the whole model.
# https://docs.djangoproject.com/en/2.2/ref/models/querysets/#values-list
named=True,
)
# This is an example of how the queryset values can be formatted before they're
# output in the streaming csv response.
import csv
from django.http import StreamingHttpResponse
class Echo:
__slots__ = ()
def write(self, value):
return value
def streaming_csv_view(request):
echo_buffer = Echo()
csv_writer = csv.writer(echo_buffer)
# This processes each row in the `queryset` so that the row's values can be
# modified before being sent back to the requester.
#
# By using the parenthesis, we can process each row lazily, just before the
# values are streamed in the response. This allows django to start sending
# the response data without having to process the entire queryset at once.
queryset = get_user_export_values()
rendered_rows = (render_row_values(row) for row in queryset)
# Then each row must be processed again to be formatted for csv output.
# Just as before, each row will be processed lazily.
csv_rows = (csv_writer.writerow(row) for row in rendered_rows)
# At this point, the queryset still has not been executed and no rows have
# been rendered or csv formatted yet. This is because all of the
# queryset/row work has been implemented lazily with generators.
response = StreamingHttpResponse(csv_rows, content_type="text/csv")
response["Content-Disposition"] = 'attachment; filename="users.csv"'
return response
def render_row_values(user_row):
"""Render the values from the `user_row` for csv output.
The `user_row` argument is a `namedtuple`:
https://docs.python.org/3/library/collections.html#collections.namedtuple
Since a `namedtuple` is immutable (like regular tuples), the `_replace`
method is used to generate the rendered return value.
"""
replacements = {}
# Render missing `profile.phone` values as empty strings so they don't
# appear as "None"
if not user_row.profile__phone:
replacements["profile__phone"] = ""
# Render missing `profile.address` values as "(no address)"
if not user_row.profile__address:
replacements["profile__address"] = "(no address)"
# Render `subscription.renewal_price` as euros
# - Round it to 2 digits
# - Add english-speaking locale comma formatting
if user_row.latest_sub_renewal_price is not None:
price = round(user_row.latest_sub_renewal_price, ndigits=2)
replacements["latest_sub_renewal_price"] = f"€{price:,f}"
else:
replacements["latest_sub_renewal_price"] = "(N/A)"
# Return a new `namedtuple`, replacing some of the values with the
# formatted versions from `replacements`.
return user_row._replace(**replacements)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment