Skip to content

Instantly share code, notes, and snippets.

@maozza
Created March 27, 2017 22:50
Show Gist options
  • Save maozza/ea46da790982efd189158a82e0bbd461 to your computer and use it in GitHub Desktop.
Save maozza/ea46da790982efd189158a82e0bbd461 to your computer and use it in GitHub Desktop.
backup to amazon S3
#! /usr/bin/python
import boto3
import os
from boto3.s3.transfer import S3Transfer
import socket
import argparse
import time
import hashlib
from botocore.exceptions import ClientError
import sys
from time import time
'''
Backup directorie/s to AWS s3.
if file exist with the same content (md5) on AWS it will skip the file.
'''
parser = argparse.ArgumentParser(description='Backup directories to s3')
#parser = OptionParser()
parser.add_argument('-i', '--input', dest='input', help='Files or Directories to backup, can send list seperated with comma', required=True)
parser.add_argument('-b', '--bucket', dest='bucket', help='Which backet to use default is: backup-il-office', default='backup-il-office')
parser.add_argument('-p', '--awsProfile', dest='awsProfile', help='aws use profile from ~/.aws/credentials', default='s3api')
parser.add_argument('-s','--silent',action='store_true',help='verbose flag' )
args = vars(parser.parse_args())
silent=args['silent']
backup_bucket=args['bucket']
if not os.path.isfile(os.path.expanduser('~')+'/.aws/credentials'):
print 'Error: credentials file if missing in ~/.aws/credentials';
sys.exit(1)
'''
example for S3 profile.
use Amazon S3 profile, configure in ~/.aws/credentials
[s3api]
aws_access_key_id = KEY
aws_secret_access_key = ACCESS_KEY
'''
os.environ["AWS_PROFILE"] = args['awsProfile']
#Use Server Side encryption, Infrequent Access.
extra_args={
'ServerSideEncryption': 'AES256',
'StorageClass': 'STANDARD_IA',
}
# the hostname is the root directoriy in the backet
backup_root_dir = socket.gethostname()
# Get the service client
client = boto3.client('s3')
transfer = S3Transfer(client)
def getMd5Sum(file):
#Calculating md5
return hashlib.md5(open(file, 'rb').read()).hexdigest()
def checkS3Md5(file,file_md5):
try:
#Get metadata from s3 and compare to local file md5
s3_md5=client.head_object(Bucket=backup_bucket,Key=file)['Metadata']['md5']
except ClientError:
#file not exist on s3, to be upload
return True
except KeyError:
#file exist on s3 but md5 metadata not exist, update anyway
return True
if file_md5 == s3_md5:
#local file same as on s3 no need to upload
return False
else:
#local file deffrent from s3 need to pload
return True
def upload(uploadfile,backup_root_dir=backup_root_dir):
stat =dict(
time = time(),
upload = 0,
size = 0,
skip = 0,
error = 0
)
if os.path.isdir(uploadfile):
uploadDir(uploadfile)
return True
if not os.access(uploadfile, os.R_OK):
print "File: " + uploadfile +" not accessible"
stat['error'] = 1
return stat
if uploadfile[0] != '/':
key_name = backup_root_dir+ '/' +uploadfile
else:
key_name = backup_root_dir + uploadfile
f_md5=getMd5Sum(uploadfile)
if checkS3Md5(key_name,f_md5):
extra_args['Metadata']= {"md5":f_md5}
transfer.upload_file(uploadfile, backup_bucket, key_name, extra_args=extra_args)
stat['size'] = os.path.getsize(uploadfile)
stat['upload'] = 1
else:
stat['skip'] = 1
stat['time'] = time() - stat['time']
return stat
def uploadDir(dir,backup_root_dir=backup_root_dir):
'''
upload single directory if needed.
'''
total_stat = dict(
counter = 0,
total_time = 0,
total_uploads =0,
total_size = 0,
total_skip = 0,
total_errors = 0)
if os.path.isdir(dir):
if dir[-1] == '/':
dir=dir[:-1]
for root, dirnames, filenames in os.walk(dir):
for file in filenames:
total_stat['counter']+= 1
uploadfile = root+'/'+file
stat=upload(uploadfile,backup_root_dir=backup_root_dir)
total_stat['total_time']+=stat['time']
total_stat['total_uploads']+=stat['upload']
total_stat['total_size']+=stat['size']
total_stat['total_skip']+=stat['skip']
total_stat['total_errors']+=stat['error']
if not silent:
print ' --Upload file-- : '+ uploadfile
print "Time Taken : " + str(stat['time'])
print "Upload : " + str(stat['upload'])
print "Skipped : " + str(stat['skip'])
print "Error : " + str(stat['error'])
print "Size : " + str(stat['size'])
print '*** S3 Upload '+ dir +' Directory Statistics **** '
print "Total number of files processed : " + str(total_stat['counter'])
print "Total time in seconds : " + str(total_stat['total_time'])
print "Total size : " + humansize(total_stat['total_size'])
print "Total files uploaded : " + str(total_stat['total_uploads'])
print "total files skipped : " + str(total_stat['total_skip'])
print "Total errors : " + str(total_stat['total_errors'])
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
def humansize(nbytes):
if nbytes == 0: return '0 B'
i = 0
while nbytes >= 1024 and i < len(suffixes)-1:
nbytes /= 1024.
i += 1
f = ('%.2f' % nbytes).rstrip('0').rstrip('.')
return '%s %s' % (f, suffixes[i])
for input in args['input'].split(","):
upload(input)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment