Skip to content

Instantly share code, notes, and snippets.

@jlecour
Created January 10, 2012 14:21
Show Gist options
  • Save jlecour/1589319 to your computer and use it in GitHub Desktop.
Save jlecour/1589319 to your computer and use it in GitHub Desktop.
s3nukem mirror
#!/usr/bin/env ruby
# s3nukem
#
# original by: Stephen Eley (sfeley@gmail.com)
# improved by: Robert LaThanh
# improved again by: Ben Hathaway
#
# A script to delete Amazon S3 buckets (or folders within buckets) with many objects (millions) quickly by
# using multiple threads to retrieve and delete the individual objects.
#
# https://github.com/hathaway/s3nukem
#
# Licensed under the Apache License v2.0 (http://apache.org/licenses)
require 'rubygems'
require 'optparse'
require 'logger'
require 'thread'
begin
require 'right_aws'
rescue LoadError
puts "Missing the RightAWS gem! Try: sudo gem install right_aws"
puts "or for Ruby 1.9: sudo gem install dmarkow-right_aws --source http://gems.github.com"
end
access_key = nil
secret_key = nil
thread_count = 10
max_queue = 10 * 1000
path = ''
STDOUT.sync = true
buckets = []
# Parse the command line
begin
opts = OptionParser.new
opts.banner = "Usage: #{$PROGRAM_NAME} [options] buckets..."
opts.separator ''
opts.separator 'Options:'
opts.on('-a',
'--access ACCESS',
String,
'Amazon Access Key (required)') {|key| access_key = key}
opts.on('-s',
'--secret SECRET',
String,
'Amazon Secret Key (required)') {|key| secret_key = key}
opts.on('-p',
'--path PATH',
String,
'Path of folder to delete (defaults to deleting entire bucket)') do |val|
path = val
end
opts.on('-t',
'--threads COUNT',
Integer,
"Number of simultaneous threads (default #{thread_count})") do |val|
thread_count = val
max_queue = val * 1000
end
opts.on('-h', '--help', 'Show this message') do
puts opts
exit
end
opts.separator ''
# opts.separator 'If the --access and --secret options are not used, the environment variables'
# opts.separator 'AMAZON_ACCESS_KEY_ID and AMAZON_SECRET_ACCESS_KEY must be set.'
opts.separator ''
buckets = opts.parse(ARGV)
if buckets.empty?
puts "You must specify at least one bucket to delete!"
puts opts
exit
end
unless access_key and secret_key
puts "The --access and --secret options are required!"
puts opts
exit
end
rescue OptionParser::ParseError
puts "Oops... #{$!}"
puts opts
exit
end
# Make a connection for bucket deletion
log = Logger.new(STDOUT)
log.level = Logger::ERROR
mains3 = RightAws::S3Interface.new(access_key, secret_key, :multi_thread => true, :port => 80, :protocol => 'http', :logger => log)
begin
mains3.list_all_my_buckets # Confirm credentials
rescue RightAws::AwsError => e
puts e.message
puts opts
exit
end
buckets.each do |bucket|
puts "DELETING: #{bucket} with #{access_key} and #{secret_key} in the path #{path}"
# Thread management
threads = []
queue = Queue.new
mutex_total = Mutex.new
# Tracking variables
total_listed = 0
total_deleted = 0
# Key retrieval thread
threads << Thread.new do
Thread.current[:number] = "CATALOG"
puts "Starting catalog thread..."
s3 = RightAws::S3Interface.new(access_key, secret_key, :multi_thread => true, :port => 80, :protocol => 'http', :logger => log)
prefix = ''
begin
while queue.length > max_queue
sleep 1
end
keys = s3.list_bucket(bucket, :marker => prefix, :prefix => path)
prefix = keys.last[:key] unless keys.empty?
keys.each do |key|
queue.enq(key[:key])
total_listed += 1
end
end until keys.empty?
thread_count.times {queue.enq(:END_OF_BUCKET)}
end
start_time = Time.now
thread_count.times do |count|
threads << Thread.new(count) do |number|
Thread.current[:number] = number
puts "Starting deletion thread #{number}..."
s3 = RightAws::S3Interface.new(access_key, secret_key, :multi_thread => true, :port => 80, :protocol => 'http', :logger => log)
begin
key = queue.deq
unless key == :END_OF_BUCKET
s3.delete(bucket, key)
mutex_total.synchronize {total_deleted += 1}
if (rand(1000) == 1)
puts "Deleting #{key}"
elapsed = Time.now - start_time
puts "#{elapsed}: Deleted #{total_deleted} out of #{total_listed}"
end
end
end until (key == :END_OF_BUCKET)
end
end
threads.each do |t|
begin
t.join
rescue RuntimeError => e
puts "Failure on thread #{t[:number]}: #{e.message}"
end
end
# Clean up any stragglers and kill the bucket
if path == ''
puts "Bucket #{bucket} deleted!" if mains3.force_delete_bucket(bucket)
else
puts "Path #{path} in bucket #{bucket} deleted!"
end
puts
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment