Skip to content

Instantly share code, notes, and snippets.

Last active August 29, 2015 14:24
Show Gist options
  • Save guewen/4792a54a3a58e10ea454 to your computer and use it in GitHub Desktop.
Save guewen/4792a54a3a58e10ea454 to your computer and use it in GitHub Desktop.
Sync logs from S3
#!/usr/bin/env python
# -*- coding: utf-8 -*-
Dowload new log files from S3.
New files are compressed with gzip and only new files are downloaded.
from __future__ import print_function
import argparse
import gzip
import os
import shutil
import subprocess
import tempfile
from contextlib import contextmanager
def cd(path):
cwd = os.getcwd()
class LogSync(object):
def __init__(self, s3_path, config_file, logs_path):
self.s3_path = s3_path
self.config_file = config_file
self.logs_path = logs_path
self._tmpdir = None
def tmpdir(self):
if not self._tmpdir:
self._tmpdir = tempfile.mkdtemp()
print('Downloading temporary files in %s' % self.tmpdir)
return self._tmpdir
def sync(self):
s3output = subprocess.check_output(['s3cmd', '-c', self.config_file,
'ls', self.s3_path])
s3files = (output.split()[3] for output in s3output.split('\n')
if output and
output.split()[2] != '0')
files = [(s3file, s3file.replace(self.s3_path, ''))
for s3file in s3files]
for s3file, filename in files:
compressed_name = filename + '.gz'
archive_dest = os.path.join(self.logs_path, compressed_name)
if os.path.exists(archive_dest):
filename = s3file.replace(self.s3_path, '')
with cd(self.tmpdir):
s3output = subprocess.check_output(['s3cmd',
'get', s3file])
print('Compressing file to %s' % compressed_name)
with open(filename, 'rb') as fh:
with, 'wb') as gz:
shutil.move(compressed_name, archive_dest)
print('Got a new archived log: %s' % archive_dest)
if __name__ == '__main__':
class S3URL(argparse.Action):
def __call__(self, parser, args, values, option_string=None):
if not values.endswith('/'):
values += '/'
setattr(args, self.dest, values)
class AbsolutePath(argparse.Action):
def __call__(self, parser, args, values, option_string=None):
values = os.path.abspath(values)
setattr(args, self.dest, values)
parser = argparse.ArgumentParser()
group = parser.add_argument_group('Local')
group.add_argument('-o', '--out', required=True,
help="Directory where the compressed logs are stored")
group = parser.add_argument_group('S3')
group.add_argument('-c', '--config-file', required=True,
help='S3 configuration file')
group.add_argument('-p', '--s3-path', required=True,
help='S3 Path, example: s3://logs-erp/xyz/')
args = parser.parse_args()
log_sync = LogSync(args.s3_path, args.config_file, args.out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment