Skip to content

Instantly share code, notes, and snippets.

@winhamwr
Created March 17, 2010 14:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save winhamwr/335306 to your computer and use it in GitHub Desktop.
Save winhamwr/335306 to your computer and use it in GitHub Desktop.
class BackupArchiveGenerator(Task):
def get_archive_name(self, tenant, date, archive):
"""
Build and return the archive name based on the requested archive.
"""
filename = '%s_to_%s' % (date.strftime('%Y_%m_%d'), datetime.datetime.today().strftime('%Y_%m_%d'))
if archive:
filename = "%s_archive" % filename
destination = os.path.join(tenant.subdomain, '%s.zip' % filename)
return destination
def get_changed_policies(self, tenant, date, categories, archive):
"""
Get all policies that have changed according to the given params.
"""
# Documents modified since date
changed_docs = Document.objects.for_tenant(tenant).filter(modified__gte=date)
if archive:
changed_docs = changed_docs.filter(status__in=[ARCHIVED, OLD])
else:
changed_docs = changed_docs.filter(status=ACTIVE)
if categories:
changed_docs = changed_docs.filter(category__in=categories)
return changed_docs
def run(self, tenant, date, categories, archive=False,
watermark=True, **kwargs):
"""
Creates an archive full of PDF versions of all documents on the given tenant
that have been modified since date and that are in one of the departments
given.
Archive has a folder structure corresponding to the department structure.
ex:
/Nursing/Code Red.pdf
/Nursing/Safety.pdf
/Administration/Time Off.pdf
/ER/Hand Wasing.pdf
If ``archive`` is ``True``, then only archived and old policies are
given and the the folder structure is organized so that each
document has its own folder and subfolder with the archive date and the
policy inside it. ex:
/Nursing/Code Red/2009_08_07/Code Red.pdf
/Nursing/Code Red/2007_06_05/Code Red.pdf
"""
logger = self.get_logger(**kwargs)
logger.info("Generating backup for tenant:%s for changes since %s" % (tenant.subdomain, date))
changed_docs = self.get_changed_policies(tenant, date, categories, archive)
if changed_docs.count() == 0:
# No docs, so we can't create an archive
return None
# Create a temp dir for storage
temp_dir = mkdtemp()
# Put all of the PDFs in the temp dir
pdf_generator = DocumentPdfGenerator()
sf = sanitize_filename # alias
doc_results = []
for d in changed_docs:
task_result = pdf_generator.delay(d, watermark=watermark)
doc_results.append((d, task_result))
for d, pdf_result in doc_results:
# Get the file and put it at the new location
try:
pdf_result.wait(print_settings.PRINTING_TIMEOUT * 100)
except TimeoutError:
pdf_result = None
if pdf_result == None or pdf_result.status != 'DONE':
logger.warning("PDF generation failed for: %s [%s]" % (d, d.pk))
continue
# Create the destination directory
if archive:
target_dir = os.path.join(sf(d.category.name), sf(d.name), d.modified.strftime('%Y_%m_%d'))
else:
target_dir = sf(d.category.name)
tmp_dest = os.path.join(temp_dir, target_dir)
if not os.path.exists(tmp_dest):
os.makedirs(tmp_dest)
f_destination = os.path.join(tmp_dest, '%s.pdf' % sf(d.name))
filename, headers = urllib.urlretrieve(pdf_result.result, f_destination)
urllib.urlcleanup()
archive_destination = self.get_archive_name(tenant, date, archive)
zip_dir, zip_file = os.path.split(archive_destination)
# Zip up the tmp dir
tmp_zip_dir = mkdtemp()
tmp_zip_file = os.path.join(tmp_zip_dir, zip_file)
os.chdir(temp_dir)
cmd = ['zip', '-qq', '-r', tmp_zip_file]
first_level_dirs = os.listdir(temp_dir)
cmd.extend(first_level_dirs)
call(cmd)
logger.info("Created zip with cmd: <%s>" % repr(cmd))
# Store the zip file
storage_cls = backup_settings.STORAGE_BACKEND
storage = storage_cls(
location=backup_settings.STORAGE_DIR,
base_url=backup_settings.STORAGE_BASE_URL
)
logger.info("storing zip temporarily at: %s" % tmp_zip_file)
saved_path = storage.save(
archive_destination, File(open(tmp_zip_file, 'rb')))
stored_url = storage.url(saved_path)
logger.info("Backup for tenant:%s for changes since %s generated at: [%s]" % (tenant.subdomain, date, stored_url))
return stored_url
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment