Skip to content

Instantly share code, notes, and snippets.

@harupy
Created March 10, 2023 05:38
Show Gist options
  • Save harupy/cafc16f45a850dad203c11f5e58184fc to your computer and use it in GitHub Desktop.
Save harupy/cafc16f45a850dad203c11f5e58184fc to your computer and use it in GitHub Desktop.
import boto3
import requests
import uuid
import tempfile
from pathlib import Path
import sys
with tempfile.TemporaryDirectory() as tmpdir:
# Create a 500MB file
print("Creating a large file")
path = Path(tmpdir) / uuid.uuid4().hex
with path.open("wb") as f:
f.seek(25 * 1024 * 1024 - 1) # 100MB
f.write(b"\0")
# Generate presigned URLs for each part of the file
s3 = boto3.client("s3")
bucket = sys.argv[1]
key = path.name
resp = s3.create_multipart_upload(Bucket=bucket, Key=key)
upload_id = resp["UploadId"]
chunk_size = 10 * 1024 * 1024 # 20MB = 5 parts
num_parts = path.stat().st_size // chunk_size + 1
urls = []
for i in range(num_parts):
part_number = i + 1
print(f"generating presigned URL for part {part_number}")
presigned_url = s3.generate_presigned_url(
"upload_part",
Params={
"Bucket": bucket,
"Key": key,
"PartNumber": part_number,
"UploadId": upload_id,
},
ExpiresIn=3600,
HttpMethod="PUT",
)
urls.append(presigned_url)
# Upload each part
parts = []
with path.open("rb") as f:
for i in range(num_parts):
part_number = i + 1
print(f"uploading part {part_number}")
data = f.read(chunk_size)
resp = requests.put(urls[i], data=data)
resp.raise_for_status()
etag = resp.headers["ETag"]
print(etag)
parts.append({"PartNumber": part_number, "ETag": etag})
# Generate presigned URL for CompleteMultipartUpload
presigned_url = s3.generate_presigned_url(
"complete_multipart_upload",
Params={
"Bucket": bucket,
"Key": key,
"UploadId": upload_id,
},
ExpiresIn=3600,
HttpMethod="POST",
)
xml = "".join(
[
f"<Part><ETag>{part['ETag']}</ETag><PartNumber>{part['PartNumber']}</PartNumber></Part>"
for part in parts
]
)
xml = f'<CompleteMultipartUpload xmlns="http://s3.amazonaws.com/doc/2006-03-01/">{xml}</CompleteMultipartUpload>'
# print(xmltodict.unparse({"Parts": parts}, expand_iter="Part"))
r = requests.post(presigned_url, data=xml)
r.raise_for_status()
print("DONE")
@harupy
Copy link
Author

harupy commented Mar 10, 2023

Creating a large file
generating presigned URL for part 1
generating presigned URL for part 2
generating presigned URL for part 3
uploading part 1
"f1c9645dbc14efddc7d8a322685f26eb"
uploading part 2
"f1c9645dbc14efddc7d8a322685f26eb"
uploading part 3
"5f363e0e58a95f06cbe9bbc662c5dfb6"
DONE

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment