-
-
Save nandoquintana/7cf43423693f7b4ded725d95ec9e858a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
#pip install boto3 | |
import boto3 | |
import pprint | |
import os | |
from botocore.utils import calculate_tree_hash | |
from botocore.exceptions import ClientError | |
import time | |
import argparse | |
pp = pprint.PrettyPrinter() | |
def getClient(): | |
client = boto3.client('glacier') | |
return client | |
#upload a file to vault | |
def upload(path,vault_name): | |
part_size = 1024*1024*16 | |
client = getClient() | |
f = open(path, 'rb') | |
fstat = os.stat(path) | |
filesize = fstat.st_size | |
re = client.initiate_multipart_upload(vaultName=vault_name,partSize=str(part_size),archiveDescription=path) | |
upload_id = re['uploadId'] | |
lastPos = 0 | |
f.seek(0) | |
while lastPos<filesize: | |
nextPos = min(part_size,filesize-lastPos) | |
nextPos = lastPos+nextPos | |
upload_range = 'bytes %s-%s/*' % (lastPos,nextPos-1) | |
upload_data = f.read(min(part_size,filesize-lastPos)) | |
client.upload_multipart_part(vaultName=vault_name,uploadId=upload_id,range=upload_range,body=upload_data) | |
lastPos = nextPos | |
f.seek(0) | |
file_checksum = calculate_tree_hash(f) | |
re = client.complete_multipart_upload(vaultName=vault_name,uploadId=upload_id,archiveSize=str(filesize),checksum=file_checksum) | |
return { | |
'archive_id':re['archiveId'], | |
'checksum':re['checksum'] | |
} | |
#fetch file list in vault | |
def listFiles(write_to_file,vault_name,job_id=None): | |
client = getClient() | |
if job_id is None: | |
re = client.initiate_job(vaultName=vault_name,jobParameters={ | |
"Type":"inventory-retrieval", | |
"Format":"CSV" | |
#"Tier":"Expedited" | |
}) | |
job_id = re['jobId'] | |
print "job_id:%s" % (job_id) | |
#job_id = u'nAbzwh2TWBmVyIlDO4YU1OiZAJD2pJTpIUSrYUSFIM-7oAtRvd_7-8Bm5GV3oj8Dz-QGFch91aQic6ApKhDo2P8wHSz7'; | |
watchJobAndSaveFile(vault_name,job_id,write_to_file) | |
#donwload archive from vault | |
def downloadArchive(write_to_file,vault_name,archive_id=None,job_id=None): | |
client = getClient() | |
if job_id is None: | |
if archive_id is None: | |
raise ValueError('archive_id and job_id can not both be null') | |
re = client.initiate_job(vaultName=vault_name,jobParameters={ | |
"Type":"archive-retrieval", | |
"Tier":"Expedited", | |
"ArchiveId":archive_id | |
}) | |
job_id = re['jobId'] | |
print "job_id:%s" % (job_id) | |
watchJobAndSaveFile(vault_name,job_id,write_to_file) | |
#watch job is complete,and download job output | |
def watchJobAndSaveFile(vault_name,job_id,write_to_file): | |
client = getClient() | |
while True: | |
re = client.describe_job(vaultName=vault_name,jobId=job_id) | |
if re['Completed'] == False: | |
time.sleep(60) | |
else: | |
break; | |
print "start write file" | |
re = client.get_job_output(vaultName=vault_name,jobId=job_id) | |
filesize = re['ResponseMetadata']['HTTPHeaders']['content-length'] | |
filesize = int(filesize) | |
part_size = 1024*1024 | |
f = open(write_to_file,'wb') | |
if filesize<=part_size: | |
body = re['body'] | |
f.write(body.read()) | |
else: | |
lastPos = 0 | |
while lastPos<filesize: | |
nextPos = min(part_size,filesize-lastPos) | |
nextPos = lastPos+nextPos | |
download_range = 'bytes=%s-%s' % (lastPos,nextPos-1) | |
re = client.get_job_output(vaultName=vault_name,jobId=job_id,range=download_range) | |
body = re['body'] | |
f.write(body.read(nextPos-lastPos)) | |
lastPos = nextPos | |
print "download:",lastPos | |
f.close() | |
print "file write to:%s" % (write_to_file) | |
def backupArchives(folder,vault_name,dry=None): | |
deletePath = "%s%s" % (folder,'.delete/') | |
if dry and not os.path.isdir(deletePath): | |
os.mkdir(deletePath) | |
for filename in os.listdir(folder): | |
if filename[1] == '.': | |
continue | |
fullPath = "%s%s" % (folder,filename) | |
if os.path.isdir(fullPath): | |
continue | |
fstat = os.stat(fullPath) | |
filesize = fstat.st_size | |
if filesize==0: | |
print "filesize is 0:%s" % (fullPath) | |
continue | |
print 'start upload file:%s' % (fullPath) | |
re = upload(fullPath,vault_name) | |
print 'finish upload file:%s,archive_id:%s' % (fullPath,re['archive_id']) | |
if dry: | |
os.rename(fullPath,"%s%s" % (deletePath,filename)) | |
else: | |
os.remove(fullPath) | |
def parseArgs(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('command',help='backupArchives,listFiles,downloadArchive commands') | |
parser.add_argument('vault_name',help='glacier vault_name') | |
parser.add_argument('--write_to_file',metavar='',help='download data from glacier and save to file') | |
parser.add_argument('--archive_id',metavar='',help='glacier archive_id') | |
parser.add_argument('--job_id',metavar='',help='glacier job') | |
parser.add_argument('--path',metavar='',help='path upload to glacier vault') | |
parser.add_argument('--dry',metavar='',help='not delete file,backup deleted file to .delete folder') | |
args = parser.parse_args() | |
return { | |
"command":args.command, | |
"vault_name":args.vault_name, | |
"write_to_file":args.write_to_file, | |
"archive_id":args.archive_id, | |
"job_id":args.job_id, | |
"path":args.path, | |
"dry":args.dry | |
} | |
if __name__ == "__main__": | |
args = parseArgs() | |
try: | |
if args['command'] == 'backupArchives': | |
backupArchives(args['path'],args['vault_name'],args["dry"]) | |
elif args['command'] == 'listFiles': | |
listFiles(args['write_to_file'],args['vault_name'],args['job_id']) | |
elif args['command'] == 'downloadArchive': | |
downloadArchive(args['write_to_file'],args['vault_name'],args['archive_id'],args['job_id']) | |
else: | |
print "command not found:%s" % (args['command']) | |
except ClientError as e: | |
pp.pprint(e.response) | |
raise | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment