Skip to content

Instantly share code, notes, and snippets.

@mrts
Created February 25, 2011 16:34
Show Gist options
  • Save mrts/844046 to your computer and use it in GitHub Desktop.
Save mrts/844046 to your computer and use it in GitHub Desktop.
import os
from django.core.management.base import CommandError, LabelCommand
from django.utils.datastructures import SortedDict
from django_commands.utils import parse_apps_and_models
class Command(LabelCommand):
args = '<upload-path> <appname.Model> [appname.Model] ...>'
help = ("Cleans orphaned file field files from <upload-path>.\n"
"'Orphaned' is defined as existing under <upload-path> "
"but\nnot referenced by any file fields in given "
"<appname.Model>s.")
def handle_label(self, label, **options):
upload_path, modelnames = label.split(' ', 1)
# Order is important to avoid races.
# Build list of files on disk first, in db second.
filenames_on_disk = list_files(upload_path)
filenames_in_database = set()
for appname, modelname in parse_apps_and_models(modelnames):
model = get_model_cls(appname, modelname)
filefields = get_filefields(model)
if not filefields:
raise CommandError("Model %s.%s contains no file fields" %
(appname, modelname))
for filefield in filefields:
kwargs = {filefield: ''}
qs = model.objects.exclude(**kwargs).values_list(
filefield, flat=True)
filenames_in_database.update(os.path.join(upload_path, name)
for name in qs)
if not filenames_in_database:
warn("No files in database")
return
db_has_extra_files = filenames_in_database.issubset(filenames_on_disk)
if db_has_extra_files:
# Avoid erasing files accidentally.
# There's a minor race here: subset check may fail if a file is
# added while the script runs.
raise CommandError("Database filenames are not a "
"subset of actual filenames on disk. Will not risk "
"erasing arbitrary files, exiting.")
dangling_files = filenames_on_disk - filenames_in_database
remove_files(dangling_files)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment