|
# Load in dependencies |
|
import argparse |
|
import datetime |
|
import subprocess |
|
import sys |
|
|
|
from boto.s3.connection import S3Connection |
|
from boto.s3.key import Key |
|
|
|
S3_BUCKET = '{{S3_BUCKET}}' |
|
S3_ACCESS_KEY_ID = '{{S3_ACCESS_KEY_ID}}' |
|
S3_SECRET_ACCESS_KEY = '{{S3_SECRET_ACCESS_KEY}}' |
|
LATEST_FILENAME = 'latest-dump' |
|
|
|
|
|
def upload_progres_cb(bytes_transmitted, bytes_total): |
|
"""Helper to output progress of S3 upload""" |
|
print('Bytes uploaded: {bytes_transmitted}/{bytes_total}' |
|
.format(bytes_transmitted=bytes_transmitted, bytes_total=bytes_total)) |
|
|
|
|
|
# Define our script |
|
def main(db_conninfo): |
|
""" |
|
Upload database dump to S3 |
|
|
|
# DEV: We wanted to write this script via `bash` but `boto's s3put` doesn't support stdin |
|
|
|
:param str db_conninfo: Connection string to use to connecting to PostgreSQL |
|
e.g. `postgresql://localhost:5432/mydb` |
|
""" |
|
# Create an S3 key to upload our dump into |
|
# Use same format as our releases (except with `000` right padding for same precision) |
|
target_filename = datetime.datetime.utcnow().strftime('%Y%m%d.%H%M%S.%f000.sql.gz') |
|
s3_target_conn = S3Connection( |
|
S3_ACCESS_KEY_ID, |
|
S3_SECRET_ACCESS_KEY, |
|
) |
|
s3_target_bucket = s3_target_conn.get_bucket(S3_BUCKET) |
|
s3_target_key = Key(bucket=s3_target_bucket, name=target_filename) |
|
s3_latest_key = Key(bucket=s3_target_bucket, name=LATEST_FILENAME) |
|
|
|
# Create a child process to dump our database info |
|
# DEV: We only dump data so we can still use `schema.sql` when loading our dumps |
|
# DEV: We pipe `stdin/stderr` to this program's `stdin/stderr` to hear any issues from `pg_dump` |
|
# https://docs.python.org/2/library/subprocess.html |
|
# DEV: We pipe to `gzip` to minimize amount of data being loaded into Python's memory |
|
print('Buffering `pg_dump --data-only` into gzip') |
|
pg_dump_child = subprocess.Popen(['pg_dump', '--data-only', db_conninfo], |
|
stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) |
|
gzip_child = subprocess.Popen(['gzip'], |
|
stdin=pg_dump_child.stdout, stdout=subprocess.PIPE, stderr=sys.stderr) |
|
gzip_content = gzip_child.stdout.read() |
|
|
|
# Wait for `pg_dump` to terminate and verify it was a good exit |
|
exit_code = pg_dump_child.wait() |
|
if exit_code != 0: |
|
raise RuntimeError('Expected exit code "0" from `pg_dump` but it was "{code}"'.format(code=exit_code)) |
|
|
|
# Wait for `gzip` to terminate and verify it was a good exit |
|
exit_code = gzip_child.wait() |
|
if exit_code != 0: |
|
raise RuntimeError('Expected exit code "0" from `gzip` but it was "{code}"'.format(code=exit_code)) |
|
|
|
# Notify user of content we generated |
|
print('Content buffered and gzipped ({bytes} bytes)'.format(bytes=len(gzip_content))) |
|
|
|
# Upload the dump to S3 |
|
# http://boto.readthedocs.org/en/latest/ref/s3.html#boto.s3.key.Key.set_contents_from_file |
|
print('Uploading data to S3 "{bucket}/{name}"'.format(bucket=S3_BUCKET, name=target_filename)) |
|
s3_target_key.set_contents_from_string(gzip_content, headers={ |
|
'Content-Type': 'text/plain', |
|
'Content-Encoding': 'gzip', |
|
}, cb=upload_progres_cb) |
|
print('Upload complete!') |
|
|
|
# Update the latest dump file on S3 to point to our new file |
|
print('Marking "{bucket}/{name}" as latest dump via "{bucket}/{latest_name}"' |
|
.format(bucket=S3_BUCKET, name=target_filename, latest_name=LATEST_FILENAME)) |
|
s3_latest_key.set_contents_from_string(target_filename) |
|
print('Marking complete!') |
|
|
|
if __name__ == '__main__': |
|
# Set up our arguments |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
'db_conninfo', help='Connection URI for source PostgreSQL (e.g. `postgresql://localhost:5432/mydb`)') |
|
|
|
# Parse them and run it through `main` |
|
args = parser.parse_args() |
|
main(**args.__dict__) |