Skip to content

Instantly share code, notes, and snippets.

@koma77
Created February 7, 2019 05:41
Show Gist options
  • Save koma77/0ac15d6ec000076a4ef047d46a15963c to your computer and use it in GitHub Desktop.
Save koma77/0ac15d6ec000076a4ef047d46a15963c to your computer and use it in GitHub Desktop.
s3 versions delete
import random
from queue import Queue
from threading import Thread
import boto
# A thread that produces data
def producer(out_q, num_threads):
print("Producer started")
s3 = boto.connect_s3()
bucket = s3.get_bucket("somebucket")
chunk_counter = 0 #this is simply a nice to have
keys = []
for key in bucket.list_versions():
keys.append(key)
if len(keys) > 999:
batch = {}
batch['num'] = chunk_counter
batch['keys'] = keys
out_q.put(batch)
chunk_counter += 1
keys = []
print("Sent another 1000 done.... {n} chunks so far".format(n=chunk_counter))
# stop workers
for i in range(num_threads):
q.put(None)
# A thread that consumes data
def consumer(in_q):
print("Consumer started")
s3 = boto.connect_s3()
bucket = s3.get_bucket("somebucket")
while True:
# Get some data
batch = in_q.get()
if batch is None:
break
bucket.delete_keys(batch['keys'])
print("Batch {} deleted".format(batch['num']))
# gogogo
num_worker_threads = 20
q = Queue()
threads = []
for i in range(num_worker_threads):
t = Thread(target=consumer, args=(q,))
t.start()
threads.append(t)
tp = Thread(target=producer, args=(q,num_worker_threads))
tp.start()
for t in threads:
t.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment