Skip to content

Instantly share code, notes, and snippets.

@fle
Last active February 23, 2024 18:58
Show Gist options
  • Save fle/e21100c5f0d0de9aa62e47da68f99017 to your computer and use it in GitHub Desktop.
Save fle/e21100c5f0d0de9aa62e47da68f99017 to your computer and use it in GitHub Desktop.
A faster collectstatic solution for Django and S3-storage
"""
A faster collectstatic solution for Django and S3-storage
by RGOODS
The full story: https://engineering.rgoods.com/django-accelerate-collectstatic-on-aws-s3.html
"""
import json
from contextlib import contextmanager
from itertools import chain
from pathlib import Path
from django.conf import settings
from django.contrib.staticfiles.management.commands.collectstatic import (
Command as DjangoCollectStaticCommand,
)
from django.utils.module_loading import import_string
from storages.backends.s3boto3 import S3ManifestStaticStorage
BASE_FILE_SYSTEM_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage"
MANIFEST_FILENAME = "staticfiles.json"
class Command(DjangoCollectStaticCommand):
force_all_resync: bool
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.static_root = Path(settings.STATIC_ROOT)
self.use_s3 = isinstance(self.storage, S3ManifestStaticStorage)
def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument(
"--force-all-resync",
"--force-all-resync",
action="store_true",
dest="force_all_resync",
help="Force reupload all files to S3.",
)
def set_options(self, **options):
super().set_options(**options)
self.force_all_resync = options["force_all_resync"]
def _get_local_manifest(self) -> dict:
"""Open and return local manifest file (json) as dict"""
with self.static_root.joinpath(MANIFEST_FILENAME).open() as f:
return json.load(f)
def _get_remote_manifest(self) -> dict | None:
"""Open and return local manifest file (json) as dict or None if it doesn't exist"""
if self.storage.exists(MANIFEST_FILENAME):
with self.storage.open(MANIFEST_FILENAME) as f:
return json.load(f)
@staticmethod
def _compute_diff_manifest_files(
local_manifest: dict, remote_manifest: dict
) -> list:
"""Compare local and remote manifest and return the list of the differing files"""
diff_manifest = {
k: v
for k, v in local_manifest["paths"].items()
if remote_manifest.get("paths", {}).get(k, "") != v
}
if getattr(settings, "WHITENOISE_KEEP_ONLY_HASHED_FILES", False):
return list(diff_manifest.values())
return list(chain(*diff_manifest.items()))
def _upload_files(self, must_sync: bool, diff_manifest_files: []):
"""Iterate on local files and (re-upload) them if needed"""
transferred = untransferred = 0
# Iterate on all files collected locally
for file_path in self.static_root.rglob("*"):
if file_path.is_dir():
# It's a dir
# => nothing to do
continue
relative_file_path = file_path.relative_to(self.static_root)
if not must_sync and not any(
str(relative_file_path).startswith(k) for k in diff_manifest_files
):
# The file already exists remotely and doesn't have changed
# => nothing to do
untransferred += 1
continue
# The file is new or has changed
# => upload it
with Path(file_path).open("rb") as f:
if not self.dry_run:
self.storage.save(str(relative_file_path), f)
else:
self.log(
f" - Pretending to upload '{relative_file_path}'", level=1
)
transferred += 1
if diff_manifest_files:
# A manifest was already existing remotely but some files have changed
# => re-upload manifest
with self.static_root.joinpath(MANIFEST_FILENAME).open("rb") as f:
if not self.dry_run:
self.storage.save(MANIFEST_FILENAME, f)
else:
self.log(f" - Pretending to upload '{MANIFEST_FILENAME}'", level=1)
return transferred, untransferred
def _sync_to_s3(self):
"""
Compare manifest that has just been built locally with
the one possibly existing remotely on S3 and (re-)upload new or changed files
"""
self.log(" - Get local and remote manifests and compare them", level=1)
# Open local manifest file
local_manifest = self._get_local_manifest()
# Open remote manifest file (if any)
remote_manifest = self._get_remote_manifest()
has_remote_manifest = remote_manifest is not None
# Compute the diff between the two manifest files
# The diff is the list of the changed files
diff_manifest_files = []
if has_remote_manifest:
self.log(" - A manifest already exists remotely", level=1)
diff_manifest_files = self._compute_diff_manifest_files(
local_manifest, remote_manifest
)
else:
self.log(" - No manifest exists remotely", level=1)
# If there is no remote manifest of if the manifest has changed,
# upload files (new or changed)
must_sync = self.force_all_resync or not has_remote_manifest
transferred = untransferred = 0
if must_sync or diff_manifest_files:
self.log(
" - Upload {count} files to S3...".format(
count="ALL" if must_sync else len(diff_manifest_files)
),
level=1,
)
transferred, untransferred = self._upload_files(
must_sync, diff_manifest_files
)
else:
self.log(" - No file to upload to S3", level=1)
# Return some information
if has_remote_manifest:
self.log(
"{} files have changed: {}".format(
len(diff_manifest_files), ", ".join(diff_manifest_files)
),
level=1,
)
else:
self.log("All files have to be uploaded.", level=1)
if self.force_all_resync:
self.log("All resync has been forced.", level=1)
self.log(
f"{transferred} transferred files to S3, {untransferred} already existing.",
level=1,
)
@contextmanager
def _force_file_system_storage(self):
"""
A context manager that replaces the static_files storage configured in settings
by whitenoise (a full-featured local storage with compression and manifest)
"""
self._original_storage = self.storage
self.storage = import_string(BASE_FILE_SYSTEM_STORAGE)()
yield
self.storage = self._original_storage
@contextmanager
def _no_log(self):
"""
A context manager that disables logs temporarily
"""
self._original_verbosity = self.verbosity
self.verbosity = 0
yield
self.verbosity = self._original_verbosity
def handle(self, **options):
"""
Override base command to speed-up collectstatic with a S3 backend
1. Collect static locally (with compression and manifest)
2. Sync to S3
"""
self.set_options(**options)
if self.use_s3:
self.log("Collect static files locally...", level=1)
with self._force_file_system_storage():
ret = super().handle(**options)
self.log(ret, level=1)
self.log("Synchronize with S3...", level=1)
self._sync_to_s3()
self.log(f"Clean local {self.static_root.name}...", level=1)
with self._force_file_system_storage(), self._no_log():
self.clear_dir(self.static_root)
else:
ret = super().handle(**options)
self.log(ret, level=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment