Skip to content

Instantly share code, notes, and snippets.

@elprup
Created October 31, 2012 06:13
Show Gist options
  • Save elprup/3985285 to your computer and use it in GitHub Desktop.
Save elprup/3985285 to your computer and use it in GitHub Desktop.
copy plain log to category log in s3 system
#!/usr/bin/python
'''
s3load
Version 0.2 2012-11-09
copy plain log to category log in s3 system
usage:
s3load.py source destination
'''
import re
import os
import sys
import datetime
import logging
logging.getLogger().setLevel(logging.DEBUG)
def get_file_date(file_path):
'''
get log file date by file name, eg. filename-2012-11-11-0000.lzo
you can define this function by yourself.
'''
file_info = re.match('^.*-([0-9]+)-([0-9]+)-([0-9]+)_[0-9]+\.lzo$', file_path)
logging.debug('filecheck: %s, got result %s' % (file_path, file_info))
if file_info is None:
logging.debug('filecheck: fail to fetch date')
return None
try:
file_info = file_info.groups()
year, month, day = int(file_info[0]), int(file_info[1]), int(file_info[2])
except:
logging.debug('filecheck: fail to initial year,month,day')
return None
file_date = datetime.date(year, month, day)
return file_date
def get_file_list(path):
''' get only file names without complete path '''
cmd = 's3cmd ls %s -r' % path
response = os.popen(cmd)
file_list = []
for line in response.xreadlines():
try:
file_path = line.strip().split()[3].split('/')[-1]
except:
continue
file_list.append(file_path)
response.close()
return file_list
def s3_copy(src, dst):
''' sync file to s3 if file not exited in s3'''
cmd = 's3cmd cp %s %s' % (src, dst)
logging.info(cmd)
os.system(cmd)
def main(argv):
# s3load.py source_root dest_root
source_root, dest_root = argv[0], argv[1]
if source_root[-1] != '/':
source_root += '/'
if dest_root[-1] != '/':
dest_root += '/'
src_files = get_file_list(source_root)
dst_files = get_file_list(dest_root)
copy_files = set(src_files) - set(dst_files)
for path in copy_files:
file_path = source_root + path
file_date = get_file_date(file_path)
if file_date is None:
continue
s3_copy(file_path, '/'.join([dest_root[:-1],'dt=%s' % file_date.strftime('%Y-%m-%d'), '']) )
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment