Created
March 27, 2017 22:50
-
-
Save maozza/ea46da790982efd189158a82e0bbd461 to your computer and use it in GitHub Desktop.
backup to amazon S3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
import boto3 | |
import os | |
from boto3.s3.transfer import S3Transfer | |
import socket | |
import argparse | |
import time | |
import hashlib | |
from botocore.exceptions import ClientError | |
import sys | |
from time import time | |
''' | |
Backup directorie/s to AWS s3. | |
if file exist with the same content (md5) on AWS it will skip the file. | |
''' | |
parser = argparse.ArgumentParser(description='Backup directories to s3') | |
#parser = OptionParser() | |
parser.add_argument('-i', '--input', dest='input', help='Files or Directories to backup, can send list seperated with comma', required=True) | |
parser.add_argument('-b', '--bucket', dest='bucket', help='Which backet to use default is: backup-il-office', default='backup-il-office') | |
parser.add_argument('-p', '--awsProfile', dest='awsProfile', help='aws use profile from ~/.aws/credentials', default='s3api') | |
parser.add_argument('-s','--silent',action='store_true',help='verbose flag' ) | |
args = vars(parser.parse_args()) | |
silent=args['silent'] | |
backup_bucket=args['bucket'] | |
if not os.path.isfile(os.path.expanduser('~')+'/.aws/credentials'): | |
print 'Error: credentials file if missing in ~/.aws/credentials'; | |
sys.exit(1) | |
''' | |
example for S3 profile. | |
use Amazon S3 profile, configure in ~/.aws/credentials | |
[s3api] | |
aws_access_key_id = KEY | |
aws_secret_access_key = ACCESS_KEY | |
''' | |
os.environ["AWS_PROFILE"] = args['awsProfile'] | |
#Use Server Side encryption, Infrequent Access. | |
extra_args={ | |
'ServerSideEncryption': 'AES256', | |
'StorageClass': 'STANDARD_IA', | |
} | |
# the hostname is the root directoriy in the backet | |
backup_root_dir = socket.gethostname() | |
# Get the service client | |
client = boto3.client('s3') | |
transfer = S3Transfer(client) | |
def getMd5Sum(file): | |
#Calculating md5 | |
return hashlib.md5(open(file, 'rb').read()).hexdigest() | |
def checkS3Md5(file,file_md5): | |
try: | |
#Get metadata from s3 and compare to local file md5 | |
s3_md5=client.head_object(Bucket=backup_bucket,Key=file)['Metadata']['md5'] | |
except ClientError: | |
#file not exist on s3, to be upload | |
return True | |
except KeyError: | |
#file exist on s3 but md5 metadata not exist, update anyway | |
return True | |
if file_md5 == s3_md5: | |
#local file same as on s3 no need to upload | |
return False | |
else: | |
#local file deffrent from s3 need to pload | |
return True | |
def upload(uploadfile,backup_root_dir=backup_root_dir): | |
stat =dict( | |
time = time(), | |
upload = 0, | |
size = 0, | |
skip = 0, | |
error = 0 | |
) | |
if os.path.isdir(uploadfile): | |
uploadDir(uploadfile) | |
return True | |
if not os.access(uploadfile, os.R_OK): | |
print "File: " + uploadfile +" not accessible" | |
stat['error'] = 1 | |
return stat | |
if uploadfile[0] != '/': | |
key_name = backup_root_dir+ '/' +uploadfile | |
else: | |
key_name = backup_root_dir + uploadfile | |
f_md5=getMd5Sum(uploadfile) | |
if checkS3Md5(key_name,f_md5): | |
extra_args['Metadata']= {"md5":f_md5} | |
transfer.upload_file(uploadfile, backup_bucket, key_name, extra_args=extra_args) | |
stat['size'] = os.path.getsize(uploadfile) | |
stat['upload'] = 1 | |
else: | |
stat['skip'] = 1 | |
stat['time'] = time() - stat['time'] | |
return stat | |
def uploadDir(dir,backup_root_dir=backup_root_dir): | |
''' | |
upload single directory if needed. | |
''' | |
total_stat = dict( | |
counter = 0, | |
total_time = 0, | |
total_uploads =0, | |
total_size = 0, | |
total_skip = 0, | |
total_errors = 0) | |
if os.path.isdir(dir): | |
if dir[-1] == '/': | |
dir=dir[:-1] | |
for root, dirnames, filenames in os.walk(dir): | |
for file in filenames: | |
total_stat['counter']+= 1 | |
uploadfile = root+'/'+file | |
stat=upload(uploadfile,backup_root_dir=backup_root_dir) | |
total_stat['total_time']+=stat['time'] | |
total_stat['total_uploads']+=stat['upload'] | |
total_stat['total_size']+=stat['size'] | |
total_stat['total_skip']+=stat['skip'] | |
total_stat['total_errors']+=stat['error'] | |
if not silent: | |
print ' --Upload file-- : '+ uploadfile | |
print "Time Taken : " + str(stat['time']) | |
print "Upload : " + str(stat['upload']) | |
print "Skipped : " + str(stat['skip']) | |
print "Error : " + str(stat['error']) | |
print "Size : " + str(stat['size']) | |
print '*** S3 Upload '+ dir +' Directory Statistics **** ' | |
print "Total number of files processed : " + str(total_stat['counter']) | |
print "Total time in seconds : " + str(total_stat['total_time']) | |
print "Total size : " + humansize(total_stat['total_size']) | |
print "Total files uploaded : " + str(total_stat['total_uploads']) | |
print "total files skipped : " + str(total_stat['total_skip']) | |
print "Total errors : " + str(total_stat['total_errors']) | |
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] | |
def humansize(nbytes): | |
if nbytes == 0: return '0 B' | |
i = 0 | |
while nbytes >= 1024 and i < len(suffixes)-1: | |
nbytes /= 1024. | |
i += 1 | |
f = ('%.2f' % nbytes).rstrip('0').rstrip('.') | |
return '%s %s' % (f, suffixes[i]) | |
for input in args['input'].split(","): | |
upload(input) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment