Skip to content

Instantly share code, notes, and snippets.

@rosdyana
Forked from victorono/remove_duplicates.py
Last active January 15, 2020 07:01
Show Gist options
  • Save rosdyana/6f2998df92270c3cdb250c371ba4f6ac to your computer and use it in GitHub Desktop.
Save rosdyana/6f2998df92270c3cdb250c371ba4f6ac to your computer and use it in GitHub Desktop.
Django - remove duplicate objects where there is more than one field to compare
from django.db.models import Count, Max, Q
from django.db.models.functions import Length
from ma_audience.models import Audience
unique_fields = ['email']
duplicates = (
Audience.objects.filter(Q(organization_id=81), ~Q(email=''), Q(email__isnull=False)).values(*unique_fields)
.order_by()
.annotate(max_id=Max('id'), count_id=Count('id'))
.filter(count_id__gt=1)
)
for duplicate in duplicates:
(
Audience.objects
.filter(organization_id=81)
.filter(**{x: duplicate[x] for x in unique_fields})
.annotate(sn_length=Length('member_sn'))
.filter(sn_length__lt=7)
.exclude(id=duplicate['max_id'])
.delete()
)
for duplicate in duplicates:
(
print(Audience.objects
.filter(organization_id=81)
.filter(**{x: duplicate[x] for x in unique_fields})
.annotate(sn_length=Length('member_sn'))
.filter(sn_length__lt=7)
.exclude(id=duplicate['max_id'])
.values('member_sn'))
)
for duplicate in duplicates:
(
Audience.objects
.filter(**{x: duplicate[x] for x in unique_fields})
.exclude(id=duplicate['max_id'])
.delete()
)
from django.db.models import Count, Max
unique_fields = ['field_1', 'field_2']
duplicates = (
MyModel.objects.values(*unique_fields)
.order_by()
.annotate(max_id=Max('id'), count_id=Count('id'))
.filter(count_id__gt=1)
)
for duplicate in duplicates:
(
MyModel.objects
.filter(**{x: duplicate[x] for x in unique_fields})
.exclude(id=duplicate['max_id'])
.delete()
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment