Skip to content

Instantly share code, notes, and snippets.

@joshuarobinson
Created November 26, 2020 11:43
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save joshuarobinson/ecf4f82e5d935f841b94b8cccae7c990 to your computer and use it in GitHub Desktop.
Save joshuarobinson/ecf4f82e5d935f841b94b8cccae7c990 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import asyncio
import boto3
import itertools
import string
import sys
import concurrent.futures
# Hard-coded Data VIP to connect
FB_DATAVIP='10.62.116.100'
PREFIXDEPTH=2
WORKERCOUNT=40
if len(sys.argv) != 2:
print("Usage: {} bucketname".format(sys.argv[0]))
sys.exit(1)
bucketname = sys.argv[1]
def count_with_prefix(prefix):
# Create an S3 client within each subprocess.
s3 = boto3.client('s3', use_ssl=False,
endpoint_url='http://' + FB_DATAVIP)
kwargs = {'Bucket' : bucketname, 'Prefix': prefix}
paginator = s3.get_paginator("list_objects_v2")
count = 0
for page in paginator.paginate(**kwargs):
try:
contents = page["Contents"]
except KeyError:
break
count += len(contents)
return count
async def count_all_keys(executor):
loop = asyncio.get_event_loop()
tasks = []
prefixes = itertools.product(string.ascii_lowercase, repeat=PREFIXDEPTH)
for prefix in list(prefixes):
tasks.append(loop.run_in_executor(executor, count_with_prefix, ''.join(prefix)))
completed, pending = await asyncio.wait(tasks)
return sum([t.result() for t in completed])
if __name__ == '__main__':
executor = concurrent.futures.ProcessPoolExecutor(max_workers=WORKERCOUNT)
event_loop = asyncio.get_event_loop()
count = event_loop.run_until_complete(count_all_keys(executor))
print(count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment