Skip to content

Instantly share code, notes, and snippets.

@ffturan
Last active September 6, 2022 20:08
Show Gist options
  • Save ffturan/c7231602be9eeb9af3489d5566e092d9 to your computer and use it in GitHub Desktop.
Save ffturan/c7231602be9eeb9af3489d5566e092d9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import boto3
import sys
from botocore.exceptions import ClientError
def check_args():
if len(sys.argv) < 4:
print(f'Usage: {sys.argv[0]} profile-name region-name bucket-name [folder-name]')
exit()
elif len(sys.argv) == 5:
return sys.argv[4]
elif len(sys.argv) == 4:
return 0
def connect_aws(vProfile, vRegion, vService):
try:
boto3.setup_default_session(
profile_name=vProfile, region_name=vRegion)
worker = boto3.client(vService)
return worker
except ClientError as e:
print(e)
#
# MAIN STARTS HERE
#
if __name__ == '__main__':
# CHECK NUMBER OF ARGS
gFolderName = check_args()
# Set vars
gProfile = sys.argv[1]
gRegion = sys.argv[2]
gBucketName = sys.argv[3]
gCount = 0
gSubCount = 0
try:
# CONNECT TO AWS
worker_s3 = connect_aws(gProfile, gRegion, 's3')
except ClientError as e:
print(e)
# LIST OBJECTS UNDER S3 BUCKET/FOLDER
paginator = worker_s3.get_paginator("list_objects_v2")
if gFolderName == 0:
response = paginator.paginate(Bucket=gBucketName)
else:
response = paginator.paginate(Bucket=gBucketName, Prefix=gFolderName)
for page in response:
print("Listing files")
files = page.get("Contents")
for file in files:
gCount += 1
print(f"File: {file['Key']}, Size: {file['Size']}, StorageClass: {file['StorageClass']} ")
# OPTIONAL NOTIFICATION PER 1M FILES
# gSubCount += 1
#if gSubCount == 1000000:
#print(f"Total number of object count so far : {gCount}")
#gSubCount = 0
print(f"Total Number of Objects: {gCount}")
@ffturan
Copy link
Author

ffturan commented Sep 6, 2022

Counting 105,560,365 files took 438 minutes ( 7.3 hours ).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment