Created
April 1, 2024 19:00
-
-
Save mmercedes/6d1b5d2243237bc2e6323523c4b780ea to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import boto3 | |
import zipfile | |
# Change these values to match your setup | |
BUCKET_NAME = 'your-bucket-name' | |
s3 = boto3.resource('s3') | |
bucket = s3.Bucket(BUCKET_NAME) | |
# Create a local directory to store the chunks | |
os.makedirs('chunks', exist_ok=True) | |
# For each zip file in the bucket | |
for obj in bucket.objects.all(): | |
if obj.key.endswith('.zip'): | |
# Download the zip file | |
print(f'Downloading {obj.key}...') | |
s3.download_file(BUCKET_NAME, obj.key, obj.key) | |
# Unzip in chunks | |
with zipfile.ZipFile(obj.key, 'r') as zip_ref: | |
for file in zip_ref.namelist(): | |
print(f'Unzipping {file}...') | |
zip_ref.extract(file, 'chunks') | |
# Break the file into chunks and upload each one | |
chunksize = 1024 * 1024 * 5 # 5MB | |
with open(f'chunks/{file}', 'rb') as data: | |
i = 0 | |
while True: | |
chunk = data.read(chunksize) | |
if not chunk: | |
break | |
print(f'Uploading chunk {i} of {file}...') | |
s3.Object(BUCKET_NAME, f'{file}.chunk{i}').put(Body=chunk) | |
i += 1 | |
# Cleanup local files to free up space | |
os.remove(obj.key) | |
os.remove(f'chunks/{file}') | |
print('Done!') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment