Skip to content

Instantly share code, notes, and snippets.

@mmercedes
Created April 1, 2024 19:00
Show Gist options
  • Save mmercedes/6d1b5d2243237bc2e6323523c4b780ea to your computer and use it in GitHub Desktop.
Save mmercedes/6d1b5d2243237bc2e6323523c4b780ea to your computer and use it in GitHub Desktop.
import os
import boto3
import zipfile
# Change these values to match your setup
BUCKET_NAME = 'your-bucket-name'
s3 = boto3.resource('s3')
bucket = s3.Bucket(BUCKET_NAME)
# Create a local directory to store the chunks
os.makedirs('chunks', exist_ok=True)
# For each zip file in the bucket
for obj in bucket.objects.all():
if obj.key.endswith('.zip'):
# Download the zip file
print(f'Downloading {obj.key}...')
s3.download_file(BUCKET_NAME, obj.key, obj.key)
# Unzip in chunks
with zipfile.ZipFile(obj.key, 'r') as zip_ref:
for file in zip_ref.namelist():
print(f'Unzipping {file}...')
zip_ref.extract(file, 'chunks')
# Break the file into chunks and upload each one
chunksize = 1024 * 1024 * 5 # 5MB
with open(f'chunks/{file}', 'rb') as data:
i = 0
while True:
chunk = data.read(chunksize)
if not chunk:
break
print(f'Uploading chunk {i} of {file}...')
s3.Object(BUCKET_NAME, f'{file}.chunk{i}').put(Body=chunk)
i += 1
# Cleanup local files to free up space
os.remove(obj.key)
os.remove(f'chunks/{file}')
print('Done!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment