Skip to content

Instantly share code, notes, and snippets.

@aaugustin
Created November 20, 2021 18:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aaugustin/a1d9a5c9cf67a7c34adec2221f5fdb42 to your computer and use it in GitHub Desktop.
Save aaugustin/a1d9a5c9cf67a7c34adec2221f5fdb42 to your computer and use it in GitHub Desktop.
Locate and remove unreferenced media files.
#!/usr/bin/env python
"""
Locate and remove unreferenced media files.
Expects DJANGO_SETTINGS_MODULE to be set in the environment.
"""
import collections
import itertools
import os.path
import django
from django.apps import apps
from django.db import models
from django.db.migrations.serializer import DeconstructableSerializer
django.setup() # noqa
def storage_key(field):
return DeconstructableSerializer(field.storage).serialize()[0]
def enumerate_database(field):
objects = field.model._base_manager
if field.blank:
objects = objects.exclude(**{field.name: ''})
return objects.values_list(field.name, flat=True)
def enumerate_storage(storage, root=''):
directories, files = storage.listdir(root)
print('directories ' + root)
print(directories)
print('files' + root)
print(files)
print()
for directory in directories:
yield from enumerate_storage(storage, os.path.join(root, directory))
for file in files:
yield os.path.join(root, file)
file_fields = [
field
for model in apps.get_models()
for field in model._meta.get_fields()
if isinstance(field, models.FileField)
]
for storage_repr, fields in itertools.groupby(file_fields, storage_key):
fields = list(fields)
storage = fields[0].storage
database_files = collections.Counter()
for field in fields:
database_files.update(enumerate_database(field))
storage_files = list(enumerate_storage(storage))
print("Multiple references")
print("-------------------")
print()
for filepath, count in database_files.most_common():
if count < 2:
break
print(count, filepath)
print()
print("Missing files")
print("-------------")
print()
for filepath in set(database_files) - set(storage_files):
print(filepath)
print()
print("Unreferenced files")
print("------------------")
print()
for filepath in set(storage_files) - set(database_files):
print(filepath)
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment