Created
February 9, 2017 20:40
-
-
Save hospadar/ab31976120e7477526b142446a3a1ee9 to your computer and use it in GitHub Desktop.
Delete lots of s3 files all at once using bulk requests
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/bin/env python3 | |
from argparse import ArgumentParser | |
import boto3, time, sys, random | |
import urllib.parse as p | |
batch_size = 1000 | |
def rm_files(uri_list, total_num): | |
client = boto3.client('s3') | |
c = 0 | |
deleted = 0 | |
to_delete = [] | |
for uri in uri_list: | |
to_delete.append(uri) | |
c += 1 | |
if len(to_delete) == batch_size \ | |
or (len(to_delete) > 0 and to_delete[0].hostname != uri.hostname)\ | |
or (c == total_num and len(to_delete) > 0): | |
resp = client.delete_objects( | |
Bucket=to_delete[0].hostname, | |
Delete={ | |
'Objects':[{'Key':x.path[1:]} for x in to_delete] | |
} | |
) | |
deleted += len(to_delete) | |
to_delete = [] | |
sys.stdout.write("\r{pct:.2f}% complete ({done:08d}/{total}) .../{last_file}"\ | |
.format(done=c, total=total_num, pct=100.*(float(c)/float(total_num)), | |
last_file="/".join(uri.path.split("/")[-3:]))) | |
print() | |
print("Deleted {0} objects.".format(deleted)) | |
if __name__ == '__main__': | |
parser = ArgumentParser("Remove many s3 keys with bulk delete requests") | |
parser.add_argument("filename", help="filename containing s3 URIs to delete") | |
o = parser.parse_args() | |
c = 0 | |
with open(o.filename) as f: | |
for line in f: | |
c+=1 | |
with open(o.filename) as f: | |
rm_files((p.urlparse(x.strip()) for x in f if x.strip()), c) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment