Skip to content

Instantly share code, notes, and snippets.

@DamnedScholar
Created March 30, 2021 14:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DamnedScholar/e5c49dfbff05b2f6cd8a5b67081fdbb9 to your computer and use it in GitHub Desktop.
Save DamnedScholar/e5c49dfbff05b2f6cd8a5b67081fdbb9 to your computer and use it in GitHub Desktop.
A class that silently syncs Dropbox -> MEDIA_ROOT, serves locally, and can be as fuzzy, random, or specific as you want to be.
# For relative imports to work in Python 3.6
import os, sys
import jinja2; sys.path.append(os.path.dirname(os.path.realpath(__file__)))
import logging
logger = logging.getLogger('dropbox-fs')
from random import choice, choices, sample
from django.conf import settings
from django.http import HttpResponseRedirect
import arrow
from django_jinja import library as jinja_library
from jinja2.ext import Extension
import dropbox
import fs
from fs._bulk import Copier
from fs.copy import copy_fs_if_newer, copy_structure
from fs.mirror import _mirror
from fs.opener import manage_fs
from fs.path import iteratepath, relpath
from fs.tools import is_thread_safe
from fs.tree import render
from fs.walk import Walker
from fuzzywuzzy import process
import sentry_sdk as sentry
class Mediabox:
def __init__(self):
'''
Mediabox is a class that integrates the Django media folder with Dropbox. The class has the ability to mirror the Dropbox folder to the local server and then serve files locally that were deposited by people with no access to the site.
'''
self.local_root = os.path.join(settings.MEDIA_ROOT, 'dropbox/')
self.local_url = os.path.join(settings.MEDIA_URL, 'dropbox/')
self.oauth = os.environ.get('DROPBOX_OAUTH')
self.api = dropbox.Dropbox(self.oauth)
self.fs = fs.open_fs(f'osfs://{self.local_root}')
self.dbfs = fs.open_fs(
f'dropbox://dropbox.com?access_token={self.oauth}')
self._index()
def __del__(self):
# Ensure opened filesystems are closed.
self.fs.close()
self.dbfs.close()
# To maintain performance, all calls to files should reference the local file tree `self.fs`, which this class will silently attempt to keep in sync with what the Dropbox API returns.
@property
def tree(self):
(dirs, files) = render(self.fs, with_color=True, file=None)
return f'Found {dirs} directories and {files} files.'
def get(self, query='', path='', random=False):
'''
Get the first or a random file according to a query parameter and/or path string.
'''
if path and path in self.index:
return path
elif not random:
return process.extractOne(query, self.index)[0]
else:
return choice([
e[0] for e in
process.extractWithoutOrder(
query, self.index, score_cutoff=90)
])
def urlize(self, path):
'''
Take a PyFS path and convert it into a URL accessible from the outside.
'''
return os.path.join(self.local_url, path)
def get_relative_url(self, **kwargs):
'''
Get a specific path or query result in a URL form from the perspective of the HTTP server, which is the path at which the file is accessible from the outside world.
'''
return self.urlize(self.get(**kwargs))
def fuzzy_clump(self, query='', max=6):
return choices([
e[0] for e in
process.extractWithoutOrder(
query, self.index, score_cutoff=90)
], k=max)
def mirror(self):
'''
Discreetly copies everything from the remote FS into the local one. This method exists so that Celery can easily call it and not have to worry about PyFilesystem internals.
'''
def breadcrumb(src_fs, src_path, dest_fs, dest_path):
# If Sentry is integrated correctly, logging calls below Error
# are added as breadcrumbs to submitted transactions.
logger.info(
f'Copied {src_path} on Dropbox to {dest_path} in the local fs.'
)
# Overriding this method from PyFilesystem so that we can run a
# lifecycle callback whenever a file is copied.
def mirror(
src_fs, # type: Union[FS, Text]
dst_fs, # type: Union[FS, Text]
walker=None, # type: Optional[Walker]
copy_if_newer=True, # type: bool
workers=0, # type: int
): # type: (...) -> None
"""Mirror files / directories from one filesystem to another.
Mirroring a filesystem will create an exact copy of ``src_fs`` on
``dst_fs``, by removing any files / directories on the destination
that aren't on the source, and copying files that aren't.
Arguments:
src_fs (FS or str): Source filesystem (URL or instance).
dst_fs (FS or str): Destination filesystem (URL or instance).
walker (~fs.walk.Walker, optional): An optional walker instance.
copy_if_newer (bool): Only copy newer files (the default).
workers (int): Number of worker threads used
(0 for single threaded). Set to a relatively low number
for network filesystems, 4 would be a good start.
"""
def src():
return manage_fs(src_fs, writeable=False)
def dst():
return manage_fs(dst_fs, create=True)
with src() as _src_fs, dst() as _dst_fs:
with _src_fs.lock(), _dst_fs.lock():
_thread_safe = is_thread_safe(_src_fs, _dst_fs)
with Copier(num_workers=workers if _thread_safe else 0) as copier:
def on_copy(*args):
breadcrumb(*args)
return copier.copy(*args)
_mirror(
_src_fs,
_dst_fs,
walker=walker,
copy_if_newer=copy_if_newer,
copy_file=on_copy,
)
with sentry.start_transaction(name='Automirror Dropbox') as watcher:
mirror(
self.dbfs, # Source
self.fs, # Destination
workers=2, # Threads (0 for single)
)
logger.info('Dropbox cloned. Building search index...')
self._index()
logger.info('Index complete.')
def _index(self):
'''
Iterate through the local mirror and build a list of all files that can be efficiently searched. These files are stored as relative paths because other functions use `os.path.join()`, which has specific behavior around absolute paths.
'''
results = []
for file in self.fs.walk.files():
results.append(relpath(file))
self.index = results
@jinja_library.extension
class MediaboxExtension(Extension):
def __init__(self, environment):
super(MediaboxExtension, self).__init__(environment)
environment.globals["mediabox"] = Mediabox()
dbx = Mediabox()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment