Skip to content

Instantly share code, notes, and snippets.

@jeffmylife
Forked from teasherm/s3_multipart_upload.py
Last active October 21, 2022 22:19
Show Gist options
  • Save jeffmylife/be433cbb58635c6761d4addf6a25f780 to your computer and use it in GitHub Desktop.
Save jeffmylife/be433cbb58635c6761d4addf6a25f780 to your computer and use it in GitHub Desktop.
S3 Multipart Upload from request.file
'''
See comments for description & usage instructions.
'''
import boto3, botocore
from .config import S3_KEY, S3_SECRET, S3_BUCKET, S3_LOCATION
def get_file_size(file):
file.seek(0, os.SEEK_END)
file_length = file.tell()
file.seek(0)
return file_length
class S3MultipartUpload(object):
# AWS throws EntityTooSmall error for parts smaller than 5 MB
PART_MINIMUM = int(5e6)
def __init__(self,
file,
part_size=int(15e6)):
self.bucket = S3_BUCKET
self.file = file
self.total_bytes = get_file_size(file)
self.part_bytes = part_size
assert part_size > self.PART_MINIMUM
self.s3 = boto3.client(
"s3",
aws_access_key_id=S3_KEY,
aws_secret_access_key=S3_SECRET
)
# Helper
@staticmethod
def as_percent(num, denom):
return float(num) / float(denom) * 100.0
def abort_all(self):
mpus = self.s3.list_multipart_uploads(Bucket=self.bucket)
aborted = []
print("Aborting", len(mpus), "uploads")
if "Uploads" in mpus:
for u in mpus["Uploads"]:
upload_id = u["UploadId"]
aborted.append(
self.s3.abort_multipart_upload(
Bucket=self.bucket, Key=self.file.filename, UploadId=upload_id))
return aborted
def create(self):
mpu = self.s3.create_multipart_upload(Bucket=self.bucket, Key=self.file.filename)
mpu_id = mpu["UploadId"]
return mpu_id
def upload(self, mpu_id):
parts = []
uploaded_bytes = 0
self.file.seek(0)
f = self.file
i = 1
while True:
data = f.read(self.part_bytes)
if not len(data):
break
part = self.s3.upload_part(
Body=data, Bucket=self.bucket, Key=self.file.filename,
UploadId=mpu_id, PartNumber=i)
parts.append({"PartNumber": i, "ETag": part["ETag"]})
uploaded_bytes += len(data)
print("{0} of {1} uploaded ({2:.3f}%)".format(
uploaded_bytes, self.total_bytes,
self.as_percent(uploaded_bytes, self.total_bytes)))
i += 1
self.file.seek(0)
return parts
def complete(self, mpu_id, parts):
result = self.s3.complete_multipart_upload(
Bucket=self.bucket,
Key=self.file.filename,
UploadId=mpu_id,
MultipartUpload={"Parts": parts})
return result
def __call__(self):
# abort all multipart uploads for this bucket (optional, for starting over)
self.abort_all()
# create new multipart upload
mpu_id = self.create()
# upload parts
parts = self.upload(mpu_id)
# complete multipart upload
print(self.complete(mpu_id, parts))
@jeffmylife
Copy link
Author

jeffmylife commented Sep 22, 2020

If you're uploading a file from client to an S3 bucket through a Flask server and don't want to save a file locally, you might have some html form like:

...
<form action="{{ url_for('upload_exp') }}" enctype=multipart/form-data method="POST">
      <input type=file name=file>
      <input type=submit value=Upload>
 </form>
...

And a function decorated with:
@app.route('/upload', methods=['GET', 'POST'])
The code below handles the upload logic for any size of upload.
Inside your upload route function you'll need something like:

...
    file = request.files['file']
    if file:
        S3MultipartUpload(file)()
... 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment