Skip to content

Instantly share code, notes, and snippets.

@vryazanov
Last active October 17, 2019 10:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vryazanov/10fc4f7790b56560565e13ad45a70e38 to your computer and use it in GitHub Desktop.
Save vryazanov/10fc4f7790b56560565e13ad45a70e38 to your computer and use it in GitHub Desktop.
S3 Multipart upload with smart_open
import argparse
import concurrent.futures
import threading
import boto3
import smart_open
s3 = boto3.resource('s3')
def simple_upload(filepath, to_bucket, to_key, ):
with smart_open.open(filepath, 'rb') as fout:
with smart_open.open(f's3://{to_bucket}/{to_key}', 'wb') as fin:
fin.write(fout.read())
def complex_upload_with_threads(filepath, to_bucket, to_key, num_of_workers):
total_chunks, parts = 0, []
executor = concurrent.futures.ThreadPoolExecutor(num_of_workers)
s3_object = s3.Object(to_bucket, to_key)
s3_multipart = s3_object.initiate_multipart_upload()
def _upload(chunk_data, chunk_number):
part = s3_multipart.Part(chunk_number)
upload = part.upload(Body=chunk_data)
parts.append({'ETag': upload['ETag'], 'PartNumber': chunk_number})
with smart_open.open(filepath, 'rb') as fileobj:
while True:
chunk_data = fileobj.read(5 * 1024 ** 2)
total_chunks += 1
if not chunk_data:
break
executor.submit(_upload, chunk_data, total_chunks)
executor.shutdown()
parts.sort(key=lambda part: part['PartNumber'])
s3_multipart.complete(MultipartUpload={'Parts': parts})
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('origin')
parser.add_argument('to_bucket', help='target s3 bucket')
parser.add_argument('to_key', help='target s3 key')
parser.add_argument('--simple', action='store_true')
args = parser.parse_args()
if args.simple:
simple_upload(args.origin, args.to_bucket, args.to_key)
else:
complex_upload_with_threads(args.origin, args.to_bucket, args.to_key, 20)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment