Skip to content

Instantly share code, notes, and snippets.

@Timvrakas
Created November 28, 2021 08:00
Show Gist options
  • Save Timvrakas/2a4f52a6a7fd164cc0cd9a0ba08e33c8 to your computer and use it in GitHub Desktop.
Save Timvrakas/2a4f52a6a7fd164cc0cd9a0ba08e33c8 to your computer and use it in GitHub Desktop.
GlacialFlow
'''Highly Redundant AWS Glacier Multipart Uploader'''
from multiprocessing import Pool
import os
import time
import boto3
import botocore.utils as utils
MEGA_BYTES = 1048576
CHUNK_MB = 32
FILE_PATH = "/mnt/c/Backup/Tim S8 Backup 2017-09-08.rar"
VAULT_NAME = 'Tim-Backup'
ARCHIVE_DESCRIPTION = 'Tim S8 Backup 2017-09-08'
CHUNK_SIZE = MEGA_BYTES * CHUNK_MB
client = boto3.client('glacier')
not_done = True
while not_done:
uploads = client.list_multipart_uploads(
vaultName=VAULT_NAME)['UploadsList']
num = 0
for upload in uploads:
print('{0}->Resume Upload'.format(num))
print(' ID: {}'.format(upload['MultipartUploadId']))
print(' Date: {}'.format(upload['CreationDate']))
print(' Description: {}'.format(
upload['ArchiveDescription']))
num += 1
print('N-> Create New Upload')
input_cmd = input('> ')
if input_cmd == 'N':
response = client.initiate_multipart_upload(
vaultName=VAULT_NAME,
archiveDescription=ARCHIVE_DESCRIPTION,
partSize=str(CHUNK_SIZE))
continue
try:
upload = uploads[int(input_cmd)]
upload_id = upload['MultipartUploadId']
print("Selected Upload {}".format(int(input_cmd)))
except Exception:
print('Invalid Response')
continue
else:
not_done = False
total_bytes = os.path.getsize(FILE_PATH)
print("File Size: " + str(total_bytes))
num_chunks = -(-total_bytes // CHUNK_SIZE) # Ceiling Division
print("Broken into " + str(num_chunks) +
" parts, each with " + str(CHUNK_SIZE) + " bytes")
chunks = set(range(num_chunks))
finished_chunks = set()
response = client.list_parts(
vaultName=VAULT_NAME, uploadId=upload_id, limit='1000')
not_done = True
while not_done:
done_parts = response['Parts']
print("Loading {}/{}...".format(len(finished_chunks), num_chunks), end='\r')
for part in done_parts:
byte_range = part['RangeInBytes']
start = byte_range.split('-')[0]
finished_chunks.add(int(start) // CHUNK_SIZE)
if 'Marker' in response:
marker = response['Marker']
response = client.list_parts(
vaultName=VAULT_NAME, uploadId=upload_id, marker=marker, limit='1000')
else:
not_done = False
print("Already Uploaded: " + str(finished_chunks))
remaining_chunks = set(chunks - finished_chunks)
def upload_chunk(i):
'''Thread Method to upload chunk'''
try:
print("Started Upload of Chunk " +
str(i) + "/" + str(num_chunks))
byte_start = i * CHUNK_SIZE
byte_end = byte_start + CHUNK_SIZE - 1
if byte_end >= total_bytes:
byte_end = total_bytes - 1
range_str = 'bytes ' + str(byte_start) + '-' + \
str(byte_end) + '/' + str(total_bytes)
with open(FILE_PATH, 'rb') as file:
file.seek(byte_start)
data = file.read(CHUNK_SIZE)
client.upload_multipart_part(
vaultName=VAULT_NAME, uploadId=upload_id, range=range_str, body=data)
print("Finished Upload of Chunk " +
str(i) + "/" + str(num_chunks))
except Exception as e:
print("Error! " + str(e))
pool = Pool(processes=5)
result = pool.map_async(upload_chunk, remaining_chunks, 1)
while not result.ready():
num_remaining = result._number_left * result._chunksize
num_completed = num_chunks - num_remaining
percent = (num_completed / num_chunks) * 100
print("Completed {}/{} ({:.3f}%)".format(num_completed,
num_chunks, percent), end='\r')
time.sleep(1)
print("Calculating Checksum...")
checksum = utils.calculate_tree_hash(open(FILE_PATH, 'rb'))
print("Checksum: {}".format(checksum))
print("Finishing Upload...")
response = client.complete_multipart_upload(
vaultName=VAULT_NAME, uploadId=upload_id, checksum=checksum, archiveSize=str(total_bytes))
print(response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment