Skip to content

Instantly share code, notes, and snippets.

@toadkicker
Created October 18, 2021 14:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save toadkicker/a0c40ab44b2702b82af3a2cdff0f7b22 to your computer and use it in GitHub Desktop.
Save toadkicker/a0c40ab44b2702b82af3a2cdff0f7b22 to your computer and use it in GitHub Desktop.
Recustive S3 download

S3 download all my things

This is a rough cut of a script to download all the things from an AWS S3 bucket. I wrote it to move files from Digital Ocean to AWS proper.

import multiprocessing
import os
import sys
import boto3
from botocore.client import Config
s3_client = None
spaces = None
def initialize():
global s3_client
global spaces
# Initialize a session using DigitalOcean Spaces. This is just a plain S3 client so ymmv.
session = boto3.session.Session()
s3_client = session.client('s3',
region_name='nyc3',
endpoint_url='https://nyc3.digitaloceanspaces.com',
aws_access_key_id='<ENTER ACCESS KEY>',
aws_secret_access_key='<ENTER SECRET KEY>')
response = s3_client.list_buckets()
spaces = [space['Name'] for space in response['Buckets']]
def listObjects(bucket):
return s3_client.list_objects(Bucket=bucket)
def downloadFile(s3Metadata):
name = s3Metadata['Key']
file = os.path.join(os.getcwd(), os.path.normpath(name))
with open(file, 'wb') as fh:
# need to iterate over spaces
s3_client.download_fileobj(spaces[0], name, fh)
print("Downloaded %s" % file, '\n')
def createDirs(files_response):
files = []
for f in files_response['Contents']:
file = os.path.join(os.getcwd(), os.path.normpath(f['Key']))
files.append(file)
workingPath = os.path.split(file)
if not os.path.exists(workingPath[0]):
os.makedirs(workingPath[0])
if __name__ == '__main__':
initialize()
pool = multiprocessing.Pool(multiprocessing.cpu_count(), initialize)
for space in spaces:
files_response = listObjects(space)
createDirs(files_response)
workers = pool.map(downloadFile, files_response['Contents'])
pool.close()
pool.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment