Skip to content

Instantly share code, notes, and snippets.

@hospadar
Created February 9, 2017 20:40
Show Gist options
  • Save hospadar/ab31976120e7477526b142446a3a1ee9 to your computer and use it in GitHub Desktop.
Save hospadar/ab31976120e7477526b142446a3a1ee9 to your computer and use it in GitHub Desktop.
Delete lots of s3 files all at once using bulk requests
#/bin/env python3
from argparse import ArgumentParser
import boto3, time, sys, random
import urllib.parse as p
batch_size = 1000
def rm_files(uri_list, total_num):
client = boto3.client('s3')
c = 0
deleted = 0
to_delete = []
for uri in uri_list:
to_delete.append(uri)
c += 1
if len(to_delete) == batch_size \
or (len(to_delete) > 0 and to_delete[0].hostname != uri.hostname)\
or (c == total_num and len(to_delete) > 0):
resp = client.delete_objects(
Bucket=to_delete[0].hostname,
Delete={
'Objects':[{'Key':x.path[1:]} for x in to_delete]
}
)
deleted += len(to_delete)
to_delete = []
sys.stdout.write("\r{pct:.2f}% complete ({done:08d}/{total}) .../{last_file}"\
.format(done=c, total=total_num, pct=100.*(float(c)/float(total_num)),
last_file="/".join(uri.path.split("/")[-3:])))
print()
print("Deleted {0} objects.".format(deleted))
if __name__ == '__main__':
parser = ArgumentParser("Remove many s3 keys with bulk delete requests")
parser.add_argument("filename", help="filename containing s3 URIs to delete")
o = parser.parse_args()
c = 0
with open(o.filename) as f:
for line in f:
c+=1
with open(o.filename) as f:
rm_files((p.urlparse(x.strip()) for x in f if x.strip()), c)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment