blackrobot/django_streaming_csv.py

## django_streaming_csv.py
"""
This is an example of how to traverse relations and implement complex
django querysets. It's in relation to the follow-up questions asked by
Jérémy Octeau's in the comments on this Stack Overflow post:
https://stackoverflow.com/a/58828200/28360
"""

# Some example models
from django.conf import settings
from django.db import models

class Profile(models.Model):
    user = models.OneToOneField(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
    phone = models.CharField()
    address = models.TextField()

class Magazine(models.Model):
    name = models.CharField()

class Subscription(models.Model):
    user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
    magazine = models.ForeignKey(Magazine, on_delete=models.CASCADE)
    created = models.DateField(auto_now_add=True)
    renewal_price = models.DecimalField(max_digits=6, decimal_places=2)


# Here's how you might generate the queryset for the csv
from django.contrib.auth import get_user_model
from django.db.models import OuterRef, Subquery

def get_user_export_values():
    User = get_user_model()
    user_queryset = User.objects.all()

    # To get the most recent subscription, you can use a subquery
    # https://docs.djangoproject.com/en/2.2/ref/models/expressions/#subquery-expressions
    sub_queryset = (
        Subscription.objects.all()
        .filter(user=OuterRef("pk"))
        .order_by("-created")
    )

    user_queryset = user_queryset.annotate(
        latest_sub_created=sub_queryset.values("created")[:1],
        latest_sub_renewal_price=sub_queryset.values("renewal_price")[:1],
        latest_sub_magazine=sub_queryset.values("magazine__name")[:1],
    )

    return user_queryset.values_list(
        "first_name",
        "last_name",
        "email",

        ###
        # In your comment, you wrote:
        #
        #   > But when I use the "profile__phone", I will get the ID of the
        #   > Object Phone. How can I modify that?
        #
        # This is incorrect. Using "profile__phone" will give you the value
        # of `user.profile.phone` if the user has a `profile`, otherwise it
        # will be `None`. It's the same for "profile__address" which I've added
        # to my example. (The `None` values will appear as the word "None" in
        # the csv file -- more on this later.)
        #
        # If you want the `user.profile.id` as well, you can add "profile__id"
        # which I've left commented out below.
        ###
        "profile__phone",
        "profile__address",
        # "profile__id",

        # These are the subquery values added in the `annotate()` method.
        "latest_sub_created",
        "latest_sub_renewal_price",
        "latest_sub_magazine",

        # This enables model-like access to each row in the queryset without
        # the overhead of loading the whole model.
        # https://docs.djangoproject.com/en/2.2/ref/models/querysets/#values-list
        named=True,
    )

# This is an example of how the queryset values can be formatted before they're
# output in the streaming csv response.
import csv
from django.http import StreamingHttpResponse

class Echo:
    __slots__ = ()

    def write(self, value):
        return value

def streaming_csv_view(request):
    echo_buffer = Echo()
    csv_writer = csv.writer(echo_buffer)

    # This processes each row in the `queryset` so that the row's values can be
    # modified before being sent back to the requester.
    #
    # By using the parenthesis, we can process each row lazily, just before the
    # values are streamed in the response. This allows django to start sending
    # the response data without having to process the entire queryset at once.
    queryset = get_user_export_values()
    rendered_rows = (render_row_values(row) for row in queryset)

    # Then each row must be processed again to be formatted for csv output.
    # Just as before, each row will be processed lazily.
    csv_rows = (csv_writer.writerow(row) for row in rendered_rows)

    # At this point, the queryset still has not been executed and no rows have
    # been rendered or csv formatted yet. This is because all of the
    # queryset/row work has been implemented lazily with generators.

    response = StreamingHttpResponse(csv_rows, content_type="text/csv")
    response["Content-Disposition"] = 'attachment; filename="users.csv"'
    return response

def render_row_values(user_row):
    """Render the values from the `user_row` for csv output.

    The `user_row` argument is a `namedtuple`:
    https://docs.python.org/3/library/collections.html#collections.namedtuple

    Since a `namedtuple` is immutable (like regular tuples), the `_replace`
    method is used to generate the rendered return value.
    """
    replacements = {}

    # Render missing `profile.phone` values as empty strings so they don't
    # appear as "None"
    if not user_row.profile__phone:
        replacements["profile__phone"] = ""

    # Render missing `profile.address` values as "(no address)"
    if not user_row.profile__address:
        replacements["profile__address"] = "(no address)"

    # Render `subscription.renewal_price` as euros
    # - Round it to 2 digits
    # - Add english-speaking locale comma formatting
    if user_row.latest_sub_renewal_price is not None:
        price = round(user_row.latest_sub_renewal_price, ndigits=2)
        replacements["latest_sub_renewal_price"] = f"€{price:,f}"
    else:
        replacements["latest_sub_renewal_price"] = "(N/A)"

    # Return a new `namedtuple`, replacing some of the values with the
    # formatted versions from `replacements`.
    return user_row._replace(**replacements)
	"""
	This is an example of how to traverse relations and implement complex
	django querysets. It's in relation to the follow-up questions asked by
	Jérémy Octeau's in the comments on this Stack Overflow post:
	https://stackoverflow.com/a/58828200/28360
	"""

	# Some example models
	from django.conf import settings
	from django.db import models

	class Profile(models.Model):
	user = models.OneToOneField(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
	phone = models.CharField()
	address = models.TextField()

	class Magazine(models.Model):
	name = models.CharField()

	class Subscription(models.Model):
	user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
	magazine = models.ForeignKey(Magazine, on_delete=models.CASCADE)
	created = models.DateField(auto_now_add=True)
	renewal_price = models.DecimalField(max_digits=6, decimal_places=2)


	# Here's how you might generate the queryset for the csv
	from django.contrib.auth import get_user_model
	from django.db.models import OuterRef, Subquery

	def get_user_export_values():
	User = get_user_model()
	user_queryset = User.objects.all()

	# To get the most recent subscription, you can use a subquery
	# https://docs.djangoproject.com/en/2.2/ref/models/expressions/#subquery-expressions
	sub_queryset = (
	Subscription.objects.all()
	.filter(user=OuterRef("pk"))
	.order_by("-created")
	)

	user_queryset = user_queryset.annotate(
	latest_sub_created=sub_queryset.values("created")[:1],
	latest_sub_renewal_price=sub_queryset.values("renewal_price")[:1],
	latest_sub_magazine=sub_queryset.values("magazine__name")[:1],
	)

	return user_queryset.values_list(
	"first_name",
	"last_name",
	"email",

	###
	# In your comment, you wrote:
	#
	# > But when I use the "profile__phone", I will get the ID of the
	# > Object Phone. How can I modify that?
	#
	# This is incorrect. Using "profile__phone" will give you the value
	# of `user.profile.phone` if the user has a `profile`, otherwise it
	# will be `None`. It's the same for "profile__address" which I've added
	# to my example. (The `None` values will appear as the word "None" in
	# the csv file -- more on this later.)
	#
	# If you want the `user.profile.id` as well, you can add "profile__id"
	# which I've left commented out below.
	###
	"profile__phone",
	"profile__address",
	# "profile__id",

	# These are the subquery values added in the `annotate()` method.
	"latest_sub_created",
	"latest_sub_renewal_price",
	"latest_sub_magazine",

	# This enables model-like access to each row in the queryset without
	# the overhead of loading the whole model.
	# https://docs.djangoproject.com/en/2.2/ref/models/querysets/#values-list
	named=True,
	)

	# This is an example of how the queryset values can be formatted before they're
	# output in the streaming csv response.
	import csv
	from django.http import StreamingHttpResponse

	class Echo:
	__slots__ = ()

	def write(self, value):
	return value

	def streaming_csv_view(request):
	echo_buffer = Echo()
	csv_writer = csv.writer(echo_buffer)

	# This processes each row in the `queryset` so that the row's values can be
	# modified before being sent back to the requester.
	#
	# By using the parenthesis, we can process each row lazily, just before the
	# values are streamed in the response. This allows django to start sending
	# the response data without having to process the entire queryset at once.
	queryset = get_user_export_values()
	rendered_rows = (render_row_values(row) for row in queryset)

	# Then each row must be processed again to be formatted for csv output.
	# Just as before, each row will be processed lazily.
	csv_rows = (csv_writer.writerow(row) for row in rendered_rows)

	# At this point, the queryset still has not been executed and no rows have
	# been rendered or csv formatted yet. This is because all of the
	# queryset/row work has been implemented lazily with generators.

	response = StreamingHttpResponse(csv_rows, content_type="text/csv")
	response["Content-Disposition"] = 'attachment; filename="users.csv"'
	return response

	def render_row_values(user_row):
	"""Render the values from the `user_row` for csv output.

	The `user_row` argument is a `namedtuple`:
	https://docs.python.org/3/library/collections.html#collections.namedtuple

	Since a `namedtuple` is immutable (like regular tuples), the `_replace`
	method is used to generate the rendered return value.
	"""
	replacements = {}

	# Render missing `profile.phone` values as empty strings so they don't
	# appear as "None"
	if not user_row.profile__phone:
	replacements["profile__phone"] = ""

	# Render missing `profile.address` values as "(no address)"
	if not user_row.profile__address:
	replacements["profile__address"] = "(no address)"

	# Render `subscription.renewal_price` as euros
	# - Round it to 2 digits
	# - Add english-speaking locale comma formatting
	if user_row.latest_sub_renewal_price is not None:
	price = round(user_row.latest_sub_renewal_price, ndigits=2)
	replacements["latest_sub_renewal_price"] = f"€{price:,f}"
	else:
	replacements["latest_sub_renewal_price"] = "(N/A)"

	# Return a new `namedtuple`, replacing some of the values with the
	# formatted versions from `replacements`.
	return user_row._replace(**replacements)