Skip to content

Instantly share code, notes, and snippets.

@joshuarobinson
Created February 8, 2022 09:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joshuarobinson/9086e6cb484407bd56424fb3bfdba7c5 to your computer and use it in GitHub Desktop.
Save joshuarobinson/9086e6cb484407bd56424fb3bfdba7c5 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import boto3
import multiprocessing
import sys
FB_DATAVIP='10.62.64.200'
AWS_KEY = os.environ.get('AWS_KEY')
AWS_SECRET = os.environ.get('AWS_SECRET')
# make a per process s3_client
s3_client = None
def initialize():
global s3_client
s3_client = boto3.resource('s3',
aws_access_key_id=AWS_KEY,
aws_secret_access_key=AWS_SECRET,
use_ssl=False, endpoint_url='http://' + FB_DATAVIP)
def check_for_custom_metadata(bucketname: str, key: str):
response = s3_client.meta.client.head_object(Bucket=bucketname, Key=key)
if response['Metadata']:
print(key)
print(response['Metadata'])
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: {} bucketname".format(sys.argv[0]))
sys.exit(1)
bucketname = sys.argv[1]
prefix = ""
print("Listing {}/{}".format(bucketname, prefix))
s3 = boto3.resource('s3', use_ssl=False, endpoint_url='http://' + FB_DATAVIP)
kwargs = {'Bucket' : bucketname, 'Prefix' : prefix}
p = multiprocessing.Pool(2 * multiprocessing.cpu_count(), initialize)
while True:
objlist = s3.meta.client.list_objects_v2(**kwargs)
keys = [o['Key'] for o in objlist.get('Contents', [])]
[p.apply_async(check_for_custom_metadata, (bucketname, k)) for k in keys]
try:
kwargs['ContinuationToken'] = objlist['NextContinuationToken']
except KeyError:
break
p.close()
p.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment