Skip to content

Instantly share code, notes, and snippets.

@Yogendra0Sharma
Forked from goliatone/dumpdbs3.py
Created January 12, 2017 08:43
Show Gist options
  • Save Yogendra0Sharma/f7bc1fccedb507ae28879442fba91364 to your computer and use it in GitHub Desktop.
Save Yogendra0Sharma/f7bc1fccedb507ae28879442fba91364 to your computer and use it in GitHub Desktop.
Simple django management command to dump database using mysqldump, pg_dump, or sqlite3 .dump, and store in S3
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
# from django.core.management.base import CommandError
from optparse import make_option
from django.conf import settings
import os
import sys
import yaml
from boto.s3.connection import S3Connection
from boto.exception import S3ResponseError
from boto.s3.key import Key
from subprocess import Popen, PIPE
# import logging
from time import sleep, time
# log = logging.getLogger(__name__)
"""
Django management command to dump database content and then upload to S3
TODO: Load from config file, refactor. We have hardcoded value of parent object
and it does not match base object or anything: storage_bucket? file is
storage_bucket.yaml, devops decided that.
TODO: Rename upload_manager to progress_manager
TODO: Should we be using Popen?
TODO: Get/handle errors from command.
TODO: Decouple action shell commands, use from template?
TODO: Should we name file dumps with DDBB type?
DONE: Rename to dumpdbS3 or something that reflects the fact that is stored
in S3.
DONE: Do we want to keep a copy? If so, then we need to/could do two uploads,
one with the regular file name, the other with a timestamp. So only the
regular name would get overridden and we could always pull the latest by
name.
"""
HELP = ['Takes a snapshot of the database and upload to S3',
'Usage:',
"\t\t\tpython manage.py dumpdbs3 -d default -b dump-test -i",
"\t\t\tpython manage.py dumpdbs3 -m create -b creates-this-bucket",
'Config:',
"With the -C option we can take configuration from a config file."
]
class Command(BaseCommand):
help = "\n".join(HELP)
option_list = BaseCommand.option_list + (
make_option(
"-d",
"--database",
dest="database",
default='default',
help='Database router id.',
metavar="DATABASE"
),
make_option(
"-b",
"--bucket",
dest="bucket",
default='dumpdb-bucket',
help="S3 bucket name",
metavar="BUCKET"
),
make_option(
"-C",
"--configfile",
dest="configfile",
# default='storage-bucket',
help="Path to yaml file to load config object",
metavar="PATH TO CONFIG"
),
make_option(
"-i",
"--incremental",
dest="incremental",
action='store_true',
help="Keep an incremental build of all dumps, "
"keeping the head always as the latest.",
metavar="INCREMENTAL"
),
make_option(
"-m",
"--bucketfactory",
dest="bucketfactory",
default='get',
help="Method used to retrieve the bucket, "
"either get or create. Different permissions."
" Defaults to 'get'",
metavar="BUCKET METHOD"
),
)
def handle(self, *args, **options):
self.file_name = 'dump.dat'
self.target = options['database']
self.bucket_name = options['bucket']
self.bucket_factory = options['bucketfactory']
self.database_settings = settings.DATABASES[self.target]
if options['configfile']:
print "Configfile: {0}".format(options['configfile'])
self.load_config(options['configfile'])
self.stdout.write("Execute dumpdb command: %s " % self.target)
# Start initial dump.
incremental = options['incremental']
self.start_dump(incremental)
def load_config(self, path):
file = open(path, 'r')
config = yaml.load(file)
# TODO: We have this hardcoded, we should fix it!
# TODO: This is weak! We need to check for stuff,
# catch errors and what nots!!
config = config['storage_bucket']
for name in config:
setattr(self, name, config[name])
file.close()
def start_dump(self, incremental):
dump = self.dump_database()
if not dump:
# This usually means that we got an error on the dump process
# ie: unknown postgress user.
print "We got not data from dump. Exiting with error."
sys.exit(0)
# We store state, using incremental to track first/second
# pass. It will affect how we name our dump file.
self.incremental = incremental
self.file_path = self.get_file_path(incremental)
self.save_dump_to_file(self.file_path, dump)
self.upload_dump_to_S3(self.file_path,
self.bucket_name, self.bucket_factory)
def get_file_path(self, incremental=True):
# timestamp = incremental and str(time()).split('.')[0] or ""
timestamp = str(time()).split('.')[0] + '_' if incremental else ""
return './%s%s' % (timestamp, self.file_name)
def dump_database(self):
print "Dump output ready..."
dumper_method = self.get_dump_method()
return dumper_method(self.database_settings['NAME'])
def get_dump_method(self):
# TODO: Abstract getting settings to method!
database_engine = self.database_settings['ENGINE']
# get the right dumper based on DDBB
dumper_type = self.get_dumper_type(database_engine)
return getattr(self, dumper_type)
def save_dump_to_file(self, file_name, dump):
print "Create dump file..."
file = open(file_name, 'w+')
file.write(dump)
file.close()
def upload_dump_to_S3(self, file_name, bucket_name, bucket_factory='get'):
print "Pushing %s to S3: %s..." % (file_name, bucket_name)
# Sanitize bucket factory method
if bucket_factory not in ("get", "create"):
bucket_factory = 'get'
conn = S3Connection()
try:
# This user requires create permissions, we could just
# change it to use get_bucket if it already exists...
# TODO: Implement command option to use dif method.
bucket_factory = getattr(conn, '%s_bucket' % bucket_factory)
bucket = bucket_factory(bucket_name)
except S3ResponseError as e:
self.stdout.write("Error while trying to connect to S3.\n%s" % e)
sys.exit(1)
manager = self.upload_manager(self.upload_done)
k = Key(bucket)
k.key = os.path.basename(file_name)
k.set_contents_from_filename(file_name,
cb=manager, num_cb=20)
def get_postgresql_psycopg2_dump(self, database):
# Generate dump file with droptables and all data.
params = ["sudo", "-u", "postgres",
"pg_dump", "-c", database]
return self.execute_dump(params)
def get_mysql_dump(self, database):
params = ['mysqldump', '--user=root',
database]
return self.execute_dump(params)
def get_sqlite3_dump(self, database):
pass
params = ['sqlite3', database,
'.dump']
return self.execute_dump(params)
def execute_dump(self, params):
process = Popen(params, stdout=PIPE)
output = process.communicate()[0]
return output
def upload_manager(self, on_done):
"""
Wrap the progress method so that we can bundle a callback
on done. We need this because we can't append the callback
in the progress callback.
"""
def _upload_progress(loaded=0, total=0, delay=0.4):
percent = int((loaded*100)/total)
remainder = 100 - percent
output = 'Uploading: [{0}{1}] {2}%\r'.format('#' * percent,
' ' * remainder,
percent)
if percent == 100:
output = output + '\n'
delay = 0
sys.stderr.write(output)
sleep(delay)
# We are done!
if percent == 100:
on_done()
return _upload_progress
def upload_done(self):
# get the file name and make path
os.remove(self.file_path)
if self.incremental:
print "Uploaded first file."
self.start_dump(False)
else:
print "Cleaning up. Done!"
def get_dumper_type(self, engine):
"""
django.db.backends.postgresql_psycopg2: psql
django.db.backends.mysql : mysql
django.db.backends.sqlite3 : sqlite3
"""
p = "django.db.backends."
return 'get_%s_dump' % engine.replace(p, "")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment