-
-
Save fle/e21100c5f0d0de9aa62e47da68f99017 to your computer and use it in GitHub Desktop.
A faster collectstatic solution for Django and S3-storage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A faster collectstatic solution for Django and S3-storage | |
by RGOODS | |
The full story: https://engineering.rgoods.com/django-accelerate-collectstatic-on-aws-s3.html | |
""" | |
import json | |
from contextlib import contextmanager | |
from itertools import chain | |
from pathlib import Path | |
from django.conf import settings | |
from django.contrib.staticfiles.management.commands.collectstatic import ( | |
Command as DjangoCollectStaticCommand, | |
) | |
from django.utils.module_loading import import_string | |
from storages.backends.s3boto3 import S3ManifestStaticStorage | |
BASE_FILE_SYSTEM_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage" | |
MANIFEST_FILENAME = "staticfiles.json" | |
class Command(DjangoCollectStaticCommand): | |
force_all_resync: bool | |
def __init__(self, *args, **kwargs): | |
super().__init__(*args, **kwargs) | |
self.static_root = Path(settings.STATIC_ROOT) | |
self.use_s3 = isinstance(self.storage, S3ManifestStaticStorage) | |
def add_arguments(self, parser): | |
super().add_arguments(parser) | |
parser.add_argument( | |
"--force-all-resync", | |
"--force-all-resync", | |
action="store_true", | |
dest="force_all_resync", | |
help="Force reupload all files to S3.", | |
) | |
def set_options(self, **options): | |
super().set_options(**options) | |
self.force_all_resync = options["force_all_resync"] | |
def _get_local_manifest(self) -> dict: | |
"""Open and return local manifest file (json) as dict""" | |
with self.static_root.joinpath(MANIFEST_FILENAME).open() as f: | |
return json.load(f) | |
def _get_remote_manifest(self) -> dict | None: | |
"""Open and return local manifest file (json) as dict or None if it doesn't exist""" | |
if self.storage.exists(MANIFEST_FILENAME): | |
with self.storage.open(MANIFEST_FILENAME) as f: | |
return json.load(f) | |
@staticmethod | |
def _compute_diff_manifest_files( | |
local_manifest: dict, remote_manifest: dict | |
) -> list: | |
"""Compare local and remote manifest and return the list of the differing files""" | |
diff_manifest = { | |
k: v | |
for k, v in local_manifest["paths"].items() | |
if remote_manifest.get("paths", {}).get(k, "") != v | |
} | |
if getattr(settings, "WHITENOISE_KEEP_ONLY_HASHED_FILES", False): | |
return list(diff_manifest.values()) | |
return list(chain(*diff_manifest.items())) | |
def _upload_files(self, must_sync: bool, diff_manifest_files: []): | |
"""Iterate on local files and (re-upload) them if needed""" | |
transferred = untransferred = 0 | |
# Iterate on all files collected locally | |
for file_path in self.static_root.rglob("*"): | |
if file_path.is_dir(): | |
# It's a dir | |
# => nothing to do | |
continue | |
relative_file_path = file_path.relative_to(self.static_root) | |
if not must_sync and not any( | |
str(relative_file_path).startswith(k) for k in diff_manifest_files | |
): | |
# The file already exists remotely and doesn't have changed | |
# => nothing to do | |
untransferred += 1 | |
continue | |
# The file is new or has changed | |
# => upload it | |
with Path(file_path).open("rb") as f: | |
if not self.dry_run: | |
self.storage.save(str(relative_file_path), f) | |
else: | |
self.log( | |
f" - Pretending to upload '{relative_file_path}'", level=1 | |
) | |
transferred += 1 | |
if diff_manifest_files: | |
# A manifest was already existing remotely but some files have changed | |
# => re-upload manifest | |
with self.static_root.joinpath(MANIFEST_FILENAME).open("rb") as f: | |
if not self.dry_run: | |
self.storage.save(MANIFEST_FILENAME, f) | |
else: | |
self.log(f" - Pretending to upload '{MANIFEST_FILENAME}'", level=1) | |
return transferred, untransferred | |
def _sync_to_s3(self): | |
""" | |
Compare manifest that has just been built locally with | |
the one possibly existing remotely on S3 and (re-)upload new or changed files | |
""" | |
self.log(" - Get local and remote manifests and compare them", level=1) | |
# Open local manifest file | |
local_manifest = self._get_local_manifest() | |
# Open remote manifest file (if any) | |
remote_manifest = self._get_remote_manifest() | |
has_remote_manifest = remote_manifest is not None | |
# Compute the diff between the two manifest files | |
# The diff is the list of the changed files | |
diff_manifest_files = [] | |
if has_remote_manifest: | |
self.log(" - A manifest already exists remotely", level=1) | |
diff_manifest_files = self._compute_diff_manifest_files( | |
local_manifest, remote_manifest | |
) | |
else: | |
self.log(" - No manifest exists remotely", level=1) | |
# If there is no remote manifest of if the manifest has changed, | |
# upload files (new or changed) | |
must_sync = self.force_all_resync or not has_remote_manifest | |
transferred = untransferred = 0 | |
if must_sync or diff_manifest_files: | |
self.log( | |
" - Upload {count} files to S3...".format( | |
count="ALL" if must_sync else len(diff_manifest_files) | |
), | |
level=1, | |
) | |
transferred, untransferred = self._upload_files( | |
must_sync, diff_manifest_files | |
) | |
else: | |
self.log(" - No file to upload to S3", level=1) | |
# Return some information | |
if has_remote_manifest: | |
self.log( | |
"{} files have changed: {}".format( | |
len(diff_manifest_files), ", ".join(diff_manifest_files) | |
), | |
level=1, | |
) | |
else: | |
self.log("All files have to be uploaded.", level=1) | |
if self.force_all_resync: | |
self.log("All resync has been forced.", level=1) | |
self.log( | |
f"{transferred} transferred files to S3, {untransferred} already existing.", | |
level=1, | |
) | |
@contextmanager | |
def _force_file_system_storage(self): | |
""" | |
A context manager that replaces the static_files storage configured in settings | |
by whitenoise (a full-featured local storage with compression and manifest) | |
""" | |
self._original_storage = self.storage | |
self.storage = import_string(BASE_FILE_SYSTEM_STORAGE)() | |
yield | |
self.storage = self._original_storage | |
@contextmanager | |
def _no_log(self): | |
""" | |
A context manager that disables logs temporarily | |
""" | |
self._original_verbosity = self.verbosity | |
self.verbosity = 0 | |
yield | |
self.verbosity = self._original_verbosity | |
def handle(self, **options): | |
""" | |
Override base command to speed-up collectstatic with a S3 backend | |
1. Collect static locally (with compression and manifest) | |
2. Sync to S3 | |
""" | |
self.set_options(**options) | |
if self.use_s3: | |
self.log("Collect static files locally...", level=1) | |
with self._force_file_system_storage(): | |
ret = super().handle(**options) | |
self.log(ret, level=1) | |
self.log("Synchronize with S3...", level=1) | |
self._sync_to_s3() | |
self.log(f"Clean local {self.static_root.name}...", level=1) | |
with self._force_file_system_storage(), self._no_log(): | |
self.clear_dir(self.static_root) | |
else: | |
ret = super().handle(**options) | |
self.log(ret, level=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment