Skip to content

Instantly share code, notes, and snippets.

@jerinisready
Created February 2, 2022 05:29
Show Gist options
  • Save jerinisready/3e86c3ef331f907ddec31531ca932eb2 to your computer and use it in GitHub Desktop.
Save jerinisready/3e86c3ef331f907ddec31531ca932eb2 to your computer and use it in GitHub Desktop.
This gist shows an optimal way to convert data to csv from foreignkey connected models in Django. I dont have an analytics; but this is purely theoretical analysis.
import csv
from collections import defaultdict
from django.db.models import Prefetch
from django.utils.datetime_safe import datetime
class Property(models.Model):
# id = ... # primary key in model
pass
class SubProperty(models.Model):
# id = ... # primary key in model
# property = Property()
## property_id = Property.id
pass
class GetDataAsCSV(object):
# defined values
_prop_pk_field = 'id' # the primary_key_field_name in property model ; ie Property.pk is 'id'
# the SubProperty.pk in property model should be _prop_pk_field; SubProperty.pk = 'id'
_prop_field_name = 'property' # the property field name in sub-property model
_file_name = None
# required vars
_fields = []
_generated_data = None
@staticmethod
def get_property_ids():
return Property.objects.all().filter(
# if you have any global filter such as user specific properties or datetime or anything.
).values_list('id', flat=True)
def get_properties(self, fields):
return Property.objects.all().filter(
# if you have any global filter such as user specific properties or datetime or anything.
).values(self._prop_pk_field, *fields)
def get_sub_properties(self, property_id_set, fields):
filter_for_selected_props = {
self._prop_field_name: property_id_set
}
return SubProperty.objects.all().filter(
# if you have any global filter such as user specific properties or datetime or anything,
).filter(**filter_for_selected_props).values(self._sub_prop_pk_field, self._prop_field_name, *fields)
@property
def file_name(self):
if self._file_name:
return self._file_name
now = datetime.now()
return "PropertyData_{day}_{month}_{year}_{timestamp}.csv".format(day=now.day, month=now.month, year=now.year, timestamp=now.timestamp())
def __init__(self, fields=None, file_name=None, prop_field_name=None):
self.properties_dataset = None
self.intermediate_data_form = None
self._fields = fields or []
self._prop_field_name = prop_field_name or self._prop_field_name
self.generate_data()
def generate_data(self):
if not self._fields: return
property_ids = self.get_property_ids() # as values_list with flat=True
sub_properties_qs = self.get_sub_properties(property_ids, self._fields)
intermediate_data_form = defaultdict(list)
for sub_prop in sub_properties_qs:
prop_id = sub_prop[self._prop_field_name]
intermediate_data_form[prop_id].append(sub_prop)
self.intermediate_data_form = intermediate_data_form
self.properties_dataset = list(self.get_properties(fields=self._fields))
for prop in self.properties_dataset:
prop[self._prop_field_name] = ''
self.write_to_csv()
def write_to_csv(self):
outfile = self.file_name
ordered_fieldnames = [
self._prop_field_name,
*self._fields
]
ordered_data = []
for prop in self.properties_dataset:
ordered_data.append(prop)
prop_id = prop['id']
if prop_id in self.intermediate_data_form:
ordered_data.extend(self.intermediate_data_form[prop_id])
with open(outfile, 'wb') as fou:
dw = csv.DictWriter(fou, delimiter=',', fieldnames=ordered_fieldnames)
dw.writeheader()
dw.writerows(ordered_data)
if __name__ == "__main__":
GetDataAsCSV(fields=('field_01', 'field_02', 'field_03', 'field_04', ))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment