maozza/s3_backup.py

## s3_backup.py
#! /usr/bin/python
import boto3
import os
from boto3.s3.transfer import S3Transfer
import socket
import argparse
import time
import hashlib
from botocore.exceptions import ClientError
import sys
from time import time

'''
Backup directorie/s to AWS s3.
if file exist with the same content (md5) on AWS it will skip the file.

'''

parser = argparse.ArgumentParser(description='Backup directories to s3')
#parser = OptionParser()
parser.add_argument('-i', '--input', dest='input', help='Files or Directories to backup, can send list seperated with comma', required=True)
parser.add_argument('-b', '--bucket', dest='bucket', help='Which backet to use default is: backup-il-office', default='backup-il-office')
parser.add_argument('-p', '--awsProfile', dest='awsProfile', help='aws use profile from ~/.aws/credentials', default='s3api')
parser.add_argument('-s','--silent',action='store_true',help='verbose flag' )
args = vars(parser.parse_args())
silent=args['silent']
backup_bucket=args['bucket']

if not os.path.isfile(os.path.expanduser('~')+'/.aws/credentials'):
    print 'Error: credentials file if missing in ~/.aws/credentials';
    sys.exit(1)


'''
example for S3 profile.
use Amazon S3 profile, configure in ~/.aws/credentials
[s3api]
aws_access_key_id = KEY
aws_secret_access_key = ACCESS_KEY
'''

os.environ["AWS_PROFILE"] = args['awsProfile']

#Use Server Side encryption, Infrequent Access.
extra_args={
'ServerSideEncryption': 'AES256',
'StorageClass': 'STANDARD_IA',
}

# the hostname is the root directoriy in the backet
backup_root_dir = socket.gethostname()


# Get the service client
client = boto3.client('s3')
transfer = S3Transfer(client)


def getMd5Sum(file):
    #Calculating md5
    return hashlib.md5(open(file, 'rb').read()).hexdigest()


def checkS3Md5(file,file_md5):
    try:
        #Get metadata from s3 and compare to local file md5
        s3_md5=client.head_object(Bucket=backup_bucket,Key=file)['Metadata']['md5']
    except ClientError:
        #file not exist on s3, to be upload
        return True
    except KeyError:
        #file exist on s3 but md5 metadata not exist, update anyway
        return True
    if file_md5 == s3_md5:
        #local file same as on s3 no need to upload
        return False
    else:
        #local file deffrent from s3 need to pload
        return True


def upload(uploadfile,backup_root_dir=backup_root_dir):
    stat =dict(
        time = time(),
        upload = 0,
        size = 0,
        skip = 0,
        error = 0
    )
    if os.path.isdir(uploadfile):
       uploadDir(uploadfile)
       return True
    if not os.access(uploadfile, os.R_OK):
        print "File: " + uploadfile +" not accessible"
        stat['error'] = 1
        return stat
    if uploadfile[0] != '/':
        key_name = backup_root_dir+ '/' +uploadfile
    else:
        key_name = backup_root_dir + uploadfile
    f_md5=getMd5Sum(uploadfile)
    if checkS3Md5(key_name,f_md5):
        extra_args['Metadata']= {"md5":f_md5}
        transfer.upload_file(uploadfile, backup_bucket, key_name, extra_args=extra_args)
        stat['size'] = os.path.getsize(uploadfile)
        stat['upload'] = 1
    else:
        stat['skip'] = 1
    stat['time'] = time() - stat['time']
    return stat

def uploadDir(dir,backup_root_dir=backup_root_dir):
    '''
    upload single directory if needed.
    '''
    total_stat = dict(
        counter = 0,
        total_time = 0,
        total_uploads =0,
        total_size = 0,
        total_skip = 0,
        total_errors = 0)

    if os.path.isdir(dir):
        if dir[-1] == '/':
            dir=dir[:-1]
    for root, dirnames, filenames in os.walk(dir):
        for file in filenames:
            total_stat['counter']+= 1
            uploadfile = root+'/'+file
            stat=upload(uploadfile,backup_root_dir=backup_root_dir)
            total_stat['total_time']+=stat['time']
            total_stat['total_uploads']+=stat['upload']
            total_stat['total_size']+=stat['size']
            total_stat['total_skip']+=stat['skip']
            total_stat['total_errors']+=stat['error']
            if not silent:
                print ' --Upload file-- : '+ uploadfile
                print "Time Taken : " + str(stat['time'])
                print "Upload     : " + str(stat['upload'])
                print "Skipped    : " + str(stat['skip'])
                print "Error      : " + str(stat['error'])
                print "Size       : " + str(stat['size'])
    print '*** S3 Upload '+ dir +' Directory Statistics ****  '
    print "Total number of files processed : " + str(total_stat['counter'])
    print "Total time in seconds           : " + str(total_stat['total_time'])
    print "Total size                      : " + humansize(total_stat['total_size'])
    print "Total files uploaded            : " + str(total_stat['total_uploads'])
    print "total files skipped             : " + str(total_stat['total_skip'])
    print "Total errors                    : " + str(total_stat['total_errors'])


suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
def humansize(nbytes):
    if nbytes == 0: return '0 B'
    i = 0
    while nbytes >= 1024 and i < len(suffixes)-1:
        nbytes /= 1024.
        i += 1
    f = ('%.2f' % nbytes).rstrip('0').rstrip('.')
    return '%s %s' % (f, suffixes[i])


for input in args['input'].split(","):
    upload(input)
	#! /usr/bin/python
	import boto3
	import os
	from boto3.s3.transfer import S3Transfer
	import socket
	import argparse
	import time
	import hashlib
	from botocore.exceptions import ClientError
	import sys
	from time import time

	'''
	Backup directorie/s to AWS s3.
	if file exist with the same content (md5) on AWS it will skip the file.

	'''

	parser = argparse.ArgumentParser(description='Backup directories to s3')
	#parser = OptionParser()
	parser.add_argument('-i', '--input', dest='input', help='Files or Directories to backup, can send list seperated with comma', required=True)
	parser.add_argument('-b', '--bucket', dest='bucket', help='Which backet to use default is: backup-il-office', default='backup-il-office')
	parser.add_argument('-p', '--awsProfile', dest='awsProfile', help='aws use profile from ~/.aws/credentials', default='s3api')
	parser.add_argument('-s','--silent',action='store_true',help='verbose flag' )
	args = vars(parser.parse_args())
	silent=args['silent']
	backup_bucket=args['bucket']

	if not os.path.isfile(os.path.expanduser('~')+'/.aws/credentials'):
	print 'Error: credentials file if missing in ~/.aws/credentials';
	sys.exit(1)


	'''
	example for S3 profile.
	use Amazon S3 profile, configure in ~/.aws/credentials
	[s3api]
	aws_access_key_id = KEY
	aws_secret_access_key = ACCESS_KEY
	'''

	os.environ["AWS_PROFILE"] = args['awsProfile']

	#Use Server Side encryption, Infrequent Access.
	extra_args={
	'ServerSideEncryption': 'AES256',
	'StorageClass': 'STANDARD_IA',
	}

	# the hostname is the root directoriy in the backet
	backup_root_dir = socket.gethostname()


	# Get the service client
	client = boto3.client('s3')
	transfer = S3Transfer(client)




	def getMd5Sum(file):
	#Calculating md5
	return hashlib.md5(open(file, 'rb').read()).hexdigest()


	def checkS3Md5(file,file_md5):
	try:
	#Get metadata from s3 and compare to local file md5
	s3_md5=client.head_object(Bucket=backup_bucket,Key=file)['Metadata']['md5']
	except ClientError:
	#file not exist on s3, to be upload
	return True
	except KeyError:
	#file exist on s3 but md5 metadata not exist, update anyway
	return True
	if file_md5 == s3_md5:
	#local file same as on s3 no need to upload
	return False
	else:
	#local file deffrent from s3 need to pload
	return True


	def upload(uploadfile,backup_root_dir=backup_root_dir):
	stat =dict(
	time = time(),
	upload = 0,
	size = 0,
	skip = 0,
	error = 0
	)
	if os.path.isdir(uploadfile):
	uploadDir(uploadfile)
	return True
	if not os.access(uploadfile, os.R_OK):
	print "File: " + uploadfile +" not accessible"
	stat['error'] = 1
	return stat
	if uploadfile[0] != '/':
	key_name = backup_root_dir+ '/' +uploadfile
	else:
	key_name = backup_root_dir + uploadfile
	f_md5=getMd5Sum(uploadfile)
	if checkS3Md5(key_name,f_md5):
	extra_args['Metadata']= {"md5":f_md5}
	transfer.upload_file(uploadfile, backup_bucket, key_name, extra_args=extra_args)
	stat['size'] = os.path.getsize(uploadfile)
	stat['upload'] = 1
	else:
	stat['skip'] = 1
	stat['time'] = time() - stat['time']
	return stat

	def uploadDir(dir,backup_root_dir=backup_root_dir):
	'''
	upload single directory if needed.
	'''
	total_stat = dict(
	counter = 0,
	total_time = 0,
	total_uploads =0,
	total_size = 0,
	total_skip = 0,
	total_errors = 0)

	if os.path.isdir(dir):
	if dir[-1] == '/':
	dir=dir[:-1]
	for root, dirnames, filenames in os.walk(dir):
	for file in filenames:
	total_stat['counter']+= 1
	uploadfile = root+'/'+file
	stat=upload(uploadfile,backup_root_dir=backup_root_dir)
	total_stat['total_time']+=stat['time']
	total_stat['total_uploads']+=stat['upload']
	total_stat['total_size']+=stat['size']
	total_stat['total_skip']+=stat['skip']
	total_stat['total_errors']+=stat['error']
	if not silent:
	print ' --Upload file-- : '+ uploadfile
	print "Time Taken : " + str(stat['time'])
	print "Upload : " + str(stat['upload'])
	print "Skipped : " + str(stat['skip'])
	print "Error : " + str(stat['error'])
	print "Size : " + str(stat['size'])
	print '* S3 Upload '+ dir +' Directory Statistics ** '
	print "Total number of files processed : " + str(total_stat['counter'])
	print "Total time in seconds : " + str(total_stat['total_time'])
	print "Total size : " + humansize(total_stat['total_size'])
	print "Total files uploaded : " + str(total_stat['total_uploads'])
	print "total files skipped : " + str(total_stat['total_skip'])
	print "Total errors : " + str(total_stat['total_errors'])




	suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
	def humansize(nbytes):
	if nbytes == 0: return '0 B'
	i = 0
	while nbytes >= 1024 and i < len(suffixes)-1:
	nbytes /= 1024.
	i += 1
	f = ('%.2f' % nbytes).rstrip('0').rstrip('.')
	return '%s %s' % (f, suffixes[i])


	for input in args['input'].split(","):
	upload(input)