Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Quick parallelized script to delete all objects in a DreamObjects (S3) bucket
import boto
import sys
import time
from multiprocessing import Process, Queue
from Queue import Empty
class Worker(Process):
def __init__(self, me, q, bucketname):
self.me = me
self.q = q
self.running = True
self.bucket = s3.get_bucket(bucketname, validate=False)
Process.__init__(self)
def run(self):
while self.running:
try:
keys = self.q.get(False)
print 'Worker', self.me, 'deleting', len(keys), 'objects...'
self.bucket.delete_keys(keys)
print 'Worker', self.me, 'done!'
except Empty, e:
keys = None
print 'Worker', self.me, 'waiting...'
time.sleep(1)
def finish(self):
self.running = False
def clear_bucket(s3, bucketname):
bucket = s3.get_bucket(bucketname, validate=False)
queue = Queue()
workers = [Worker(i, queue, bucketname) for i in range(10)]
for worker in workers:
worker.start()
keys = []
for key in bucket.list():
if len(keys) < 100:
keys.append(key)
else:
queue.put(keys)
keys = []
if queue.qsize() > 30:
print 'Queue full...'
time.sleep(5)
for worker in workers:
worker.finish()
s3 = boto.connect_s3(host='objects-us-west-1.dream.io')
clear_bucket(s3, 'YOUR-BUCKET-NAME')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment