Skip to content

Instantly share code, notes, and snippets.

@kevinschoon
Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kevinschoon/c4c7ae2000fb52f0013c to your computer and use it in GitHub Desktop.
Save kevinschoon/c4c7ae2000fb52f0013c to your computer and use it in GitHub Desktop.
S3 Persistence in a Docker Container
#!/usr/bin/env python3
"""
Simple script to maintain persistence between a local file system and an S3 bucket using the s3cmd commandline tool
and asyncio's event loop. A better version of this would likely use the Python Boto library and walk the file system
itself. This will run a job every five minutes using the s3cmd client to synchronize a local file system
with S3. This is meant to be run in a Docker volume container. This is NOT meant to store things such as database
files, etc. A better alternative is likely S3Fuse however that project is old and requires privileged access to
the Docker host.
"""
import asyncio
import subprocess
import argparse
import logging
import sys
import signal
import functools
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level='INFO')
class S3CmdException(Exception):
pass
def synchronize(loop, seconds, bucket, file_path):
logging.info('Synchronizing S3 bucket: {} with local file path: {}'.format(bucket, file_path))
with subprocess.Popen(["s3cmd", "sync", "--delete-removed", file_path, bucket], stdout=subprocess.PIPE) as process:
while process.poll() is None:
sys.stdout.write(process.stdout.read().decode('utf-8'))
code = process.poll()
if code != 0:
raise S3CmdException('Caught exception running s3cmd!: Exit code: {}'.format(code))
logging.info('Synchronized.. moving on..')
loop.call_later(seconds, synchronize, loop, seconds, bucket, file_path)
def run(args):
def sig_handler(number):
print('Caught signal {}, shutting down the event loop'.format(number))
loop.stop()
loop = asyncio.get_event_loop()
loop.call_soon(synchronize, loop, args.seconds, args.bucket, args.path)
for sig in ('SIGINT', 'SIGTERM'): # Catch signals 15 and 2 and cleanly shut down the event loop.
loop.add_signal_handler(getattr(signal, sig), functools.partial(sig_handler, sig))
loop.run_forever()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Persistence!')
parser.add_argument('-s', '--seconds', help='Seconds between syncs', default=5)
parser.add_argument('-b', '--bucket', help='S3 bucket to synchronize', default='s3://docker-persistence/')
parser.add_argument('-p', '--path', help='Path to synchronize', default='/data/')
run(parser.parse_args())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment