Skip to content

Instantly share code, notes, and snippets.

@nay-kang
Last active September 6, 2019 15:23
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save nay-kang/afcb3fc14f2d31b3572034c75fc32503 to your computer and use it in GitHub Desktop.
Save nay-kang/afcb3fc14f2d31b3572034c75fc32503 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#pip install boto3
import boto3
import pprint
import os
from botocore.utils import calculate_tree_hash
from botocore.exceptions import ClientError
import time
import argparse
pp = pprint.PrettyPrinter()
def getClient():
client = boto3.client('glacier')
return client
#upload a file to vault
def upload(path,vault_name):
part_size = 1024*1024*16
client = getClient()
f = open(path)
fstat = os.stat(path)
filesize = fstat.st_size
re = client.initiate_multipart_upload(vaultName=vault_name,partSize=str(part_size),archiveDescription=path)
upload_id = re['uploadId']
lastPos = 0
f.seek(0)
while lastPos<filesize:
nextPos = min(part_size,filesize-lastPos)
nextPos = lastPos+nextPos
upload_range = 'bytes %s-%s/*' % (lastPos,nextPos-1)
upload_data = f.read(min(part_size,filesize-lastPos))
client.upload_multipart_part(vaultName=vault_name,uploadId=upload_id,range=upload_range,body=upload_data)
lastPos = nextPos
f.seek(0)
file_checksum = calculate_tree_hash(f)
re = client.complete_multipart_upload(vaultName=vault_name,uploadId=upload_id,archiveSize=str(filesize),checksum=file_checksum)
return {
'archive_id':re['archiveId'],
'checksum':re['checksum']
}
#fetch file list in vault
def listFiles(write_to_file,vault_name,job_id=None):
client = getClient()
if job_id is None:
re = client.initiate_job(vaultName=vault_name,jobParameters={
"Type":"inventory-retrieval",
"Format":"CSV"
#"Tier":"Expedited"
})
job_id = re['jobId']
print "job_id:%s" % (job_id)
#job_id = u'nAbzwh2TWBmVyIlDO4YU1OiZAJD2pJTpIUSrYUSFIM-7oAtRvd_7-8Bm5GV3oj8Dz-QGFch91aQic6ApKhDo2P8wHSz7';
watchJobAndSaveFile(vault_name,job_id,write_to_file)
#donwload archive from vault
def downloadArchive(write_to_file,vault_name,archive_id=None,job_id=None):
client = getClient()
if job_id is None:
if archive_id is None:
raise ValueError('archive_id and job_id can not both be null')
re = client.initiate_job(vaultName=vault_name,jobParameters={
"Type":"archive-retrieval",
"Tier":"Expedited",
"ArchiveId":archive_id
})
job_id = re['jobId']
print "job_id:%s" % (job_id)
watchJobAndSaveFile(vault_name,job_id,write_to_file)
#watch job is complete,and download job output
def watchJobAndSaveFile(vault_name,job_id,write_to_file):
client = getClient()
while True:
re = client.describe_job(vaultName=vault_name,jobId=job_id)
if re['Completed'] == False:
time.sleep(60)
else:
break;
print "start write file"
re = client.get_job_output(vaultName=vault_name,jobId=job_id)
filesize = re['ResponseMetadata']['HTTPHeaders']['content-length']
filesize = int(filesize)
part_size = 1024*1024
f = open(write_to_file,'wb')
if filesize<=part_size:
body = re['body']
f.write(body.read())
else:
lastPos = 0
while lastPos<filesize:
nextPos = min(part_size,filesize-lastPos)
nextPos = lastPos+nextPos
download_range = 'bytes=%s-%s' % (lastPos,nextPos-1)
re = client.get_job_output(vaultName=vault_name,jobId=job_id,range=download_range)
body = re['body']
f.write(body.read(nextPos-lastPos))
lastPos = nextPos
print "download:",lastPos
f.close()
print "file write to:%s" % (write_to_file)
def backupArchives(folder,vault_name,dry=None):
deletePath = "%s%s" % (folder,'.delete/')
if dry and not os.path.isdir(deletePath):
os.mkdir(deletePath)
for filename in os.listdir(folder):
if filename[1] == '.':
continue
fullPath = "%s%s" % (folder,filename)
if os.path.isdir(fullPath):
continue
fstat = os.stat(fullPath)
filesize = fstat.st_size
if filesize==0:
print "filesize is 0:%s" % (fullPath)
continue
print 'start upload file:%s' % (fullPath)
re = upload(fullPath,vault_name)
print 'finish upload file:%s,archive_id:%s' % (fullPath,re['archive_id'])
if dry:
os.rename(fullPath,"%s%s" % (deletePath,filename))
else:
os.remove(fullPath)
def parseArgs():
parser = argparse.ArgumentParser()
parser.add_argument('command',help='backupArchives,listFiles,downloadArchive commands')
parser.add_argument('vault_name',help='glacier vault_name')
parser.add_argument('--write_to_file',metavar='',help='download data from glacier and save to file')
parser.add_argument('--archive_id',metavar='',help='glacier archive_id')
parser.add_argument('--job_id',metavar='',help='glacier job')
parser.add_argument('--path',metavar='',help='path upload to glacier vault')
parser.add_argument('--dry',metavar='',help='not delete file,backup deleted file to .delete folder')
args = parser.parse_args()
return {
"command":args.command,
"vault_name":args.vault_name,
"write_to_file":args.write_to_file,
"archive_id":args.archive_id,
"job_id":args.job_id,
"path":args.path,
"dry":args.dry
}
if __name__ == "__main__":
args = parseArgs()
try:
if args['command'] == 'backupArchives':
backupArchives(args['path'],args['vault_name'],args["dry"])
elif args['command'] == 'listFiles':
listFiles(args['write_to_file'],args['vault_name'],args['job_id'])
elif args['command'] == 'downloadArchive':
downloadArchive(args['write_to_file'],args['vault_name'],args['archive_id'],args['job_id'])
else:
print "command not found:%s" % (args['command'])
except ClientError as e:
pp.pprint(e.response)
raise
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment