Skip to content

Instantly share code, notes, and snippets.

@ripiuk
Last active August 7, 2023 17:23
Show Gist options
  • Save ripiuk/29da12bbfef629dbe8b4c27cdf5891ec to your computer and use it in GitHub Desktop.
Save ripiuk/29da12bbfef629dbe8b4c27cdf5891ec to your computer and use it in GitHub Desktop.
A tool for downloading a list of files from s3 using asyncio
import os
import time
import asyncio
import aiobotocore
import aiofiles
AWS_ACCESS_KEY_ID = '' # aws access key
AWS_SECRET_ACCESS_KEY = '' # aws secret key
READ_TIMEOUT = 500
CONNECT_TIMEOUT = 500
async def go(loop):
bucket = 'bucket-example' # your bucket name
folder = 'Example/The_folder' # path to folder in s3
local_dir = '/home/some_user_name/data_from_s3/' # where do you want to download the files (local storage)
region_name = 'us-west-2'
keys = []
start = time.time()
session = aiobotocore.get_session(loop=loop)
configuration = aiobotocore.config.AioConfig(read_timeout=READ_TIMEOUT, connect_timeout=CONNECT_TIMEOUT)
async with session.create_client('s3', region_name=region_name,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
aws_access_key_id=AWS_ACCESS_KEY_ID config=configuration) as client:
paginator = client.get_paginator('list_objects')
async for result in paginator.paginate(Bucket=bucket, Prefix=folder):
for c in result.get('Contents', []):
keys.append(c.get('Key'))
downloaded_files_count = 0
total_files = len(keys)
download_futures = [download_file(client, bucket, local_dir, key) for key in keys]
for download_future in asyncio.as_completed(download_futures):
await download_future
downloaded_files_count += 1
print('{} of {} files downloaded'.format(downloaded_files_count, total_files))
print(time.time() - start)
async def download_file(client, bucket: str, local_dir: str, key: str):
response = await client.get_object(Bucket=bucket, Key=key)
async with response['Body'] as stream:
downloaded_bytes = await stream.read()
download_path = local_dir + os.path.dirname(key)
if not os.path.exists(download_path):
os.makedirs(download_path)
async with aiofiles.open(local_dir + key, 'wb+') as file:
await file.write(downloaded_bytes)
loop = asyncio.get_event_loop()
loop.run_until_complete(go(loop))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment