Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jesperalmstrom/8c96d016e3ecfd0d79afccfe9b05ccf9 to your computer and use it in GitHub Desktop.
Save jesperalmstrom/8c96d016e3ecfd0d79afccfe9b05ccf9 to your computer and use it in GitHub Desktop.
Remove deprecated Cost and Usage Report objects from S3 bucket
#!/usr/bin/python
# Usage for a AWS test profile:
# python scriptname.py test | less
import boto3
import botocore.session
import argparse
import json
# define some colors
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
parser = argparse.ArgumentParser()
parser.add_argument("profile", type=str, help="The aws profile to use")
parser.add_argument('--dryrun', type=bool, default=True, help="Enter --dryrun False to accutally delete cur objects")
args = parser.parse_args()
dryrun = (args.dryrun == 'False')
# Set your profile name on a low-level Botocore session
boto3.setup_default_session(profile_name=args.profile)
cur_bucket = 'bucket.aws.cost.report' # Replace with your bucket
cur_prefix = 'cur/Cost_and_Usage_report/' # Replace with cur prefix
manifest_file = 'Cost_and_Usage_report-Manifest.json' # Name of the manifest file that reside in the current monthly data
# for all buckets that we want to change:
s3 = boto3.resource('s3')
s3client = boto3.client('s3')
paginator = s3client.get_paginator('list_objects_v2')
for result in paginator.paginate(Bucket=cur_bucket, Prefix=cur_prefix, Delimiter='/'):
for prefix in result.get('CommonPrefixes'):
manifest_obj = prefix.get('Prefix') + manifest_file
print(f'{bcolors.OKGREEN}{bcolors.BOLD}Try to find manifest - {manifest_obj}{bcolors.ENDC}')
try:
response = s3client.get_object(Bucket=cur_bucket, Key=manifest_obj)
manifest_file_content = response['Body'].read().decode()
manifest_dict = json.loads(manifest_file_content)
folder_to_keep = manifest_dict['assemblyId']
print(f'{bcolors.OKGREEN} This is the folder to keep {folder_to_keep}{bcolors.ENDC}')
files = s3client.list_objects(Bucket=cur_bucket, Prefix=prefix.get('Prefix'))
for f in files['Contents']:
filename = f['Key']
if folder_to_keep not in filename and filename not in manifest_obj:
if not dryrun:
print(f'{bcolors.OKBLUE}Removing file {filename}{bcolors.ENDC}')
# WARNING this is the point of no return
s3client.delete_object(Bucket=cur_bucket, Key=filename)
pass
else:
print(f'{bcolors.OKGREEN}Dry run: {filename} not deleted{bcolors.ENDC}')
pass
else:
print(f"{bcolors.WARNING}NOT deleting {filename}{bcolors.ENDC}")
except Exception as e:
print(bcolors.FAIL + str(e) + bcolors.ENDC)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment