Skip to content

Instantly share code, notes, and snippets.

@jsonperl
Last active August 29, 2015 13:56
Show Gist options
  • Save jsonperl/9199093 to your computer and use it in GitHub Desktop.
Save jsonperl/9199093 to your computer and use it in GitHub Desktop.
S3 File Deleter, to delete entire buckets, or subsets...
# Use fog to delete all matching files from a bucket, specifically targeting versioned buckets.
#
# Note: you'll want to 'suspend' versioning since deletes would create a ton of
# DeleteMarkers, which you would then have to delete, recursively,
# until the end of time.
#
# Allows for a dry run to see what you're gonna kill before you kill it
class S3FileDeleter
DELETE_BATCH_SIZE = 900
# Must specifically set dry_run to false as a safeguard against derpy deletes
def initialize(bucket, dry_run = true)
@bucket = bucket
@dry_run = dry_run
@current_batch = {}
@delete_count = 0
end
# Can use a prefix or regex to specify the files to delete, or blank to delete it all.
def delete!(prefix, regex)
get_file_keys(prefix, regex).each do |key|
get_versions(key).each do |version|
delete_file version
end
end
run_current_batch if batch_count > 0
if @dry_run
puts "Dry run: would have deleted #{@delete_count} files..."
else
puts "Deleted #{@delete_count} files..."
end
end
def get_file_keys(prefix = nil, regex = nil)
opts = {}
opts[:prefix] = prefix if prefix
keys = []
S3.directories.get(@bucket, opts).files.all.each do |file|
# Add this file to the queue.
if regex.nil? || file.key.match(regex)
keys << file.key
end
end
keys
end
def batch_count
@current_batch.values.flatten.count
end
def delete_file(version)
object = version.has_key?('Version') ? version['Version'] : version['DeleteMarker']
key = object['Key']
versionId = object['VersionId'] == "null" ? nil : object['VersionId']
if @current_batch.has_key? key
@current_batch[key] << versionId
else
@current_batch[key] = [versionId]
end
if batch_count == DELETE_BATCH_SIZE
run_current_batch
end
end
def run_current_batch
object_names = @current_batch.keys
opts = {'versionId' => @current_batch, :quiet => true}
@current_batch.each do |key, versions|
puts "== #{@dry_run ? 'Would' : 'Will'} delete key '#{key}'"
vers = versions.flatten.compact
if vers.size > 0
puts "Versions:"
vers.each do |v|
puts v
end
end
end
unless @dry_run
S3.delete_multiple_objects(@bucket, object_names, opts)
end
@delete_count += batch_count
@current_batch = {}
end
def get_versions(key)
opts = {'prefix' => key}
isTruncated = true
versions = []
while isTruncated
result = S3.get_bucket_object_versions(@bucket, opts).body
if isTruncated = result['IsTruncated']
opts['version-id-marker'] = result['NextVersionIdMarker']
opts['key-marker'] = result['NextKeyMarker']
end
versions += result['Versions']
end
versions
end
end
module Fog
module Storage
class AWS
class Real
# This did not work like it says it did, so I hacked it up to work correctly
def delete_multiple_objects(bucket_name, object_names, options = {})
headers = options.dup
data = "<Delete>"
data << "<Quiet>true</Quiet>" if headers.delete(:quiet)
version_ids = headers.delete('versionId')
object_names.each do |object_name|
object_version = version_ids.nil? ? nil : version_ids[object_name]
if object_version
[object_version].flatten.each do |ov|
data << "<Object><Key>#{CGI.escapeHTML(object_name)}</Key><VersionId>#{CGI.escapeHTML(ov)}</VersionId></Object>"
end
else
data << "<Object><Key>#{CGI.escapeHTML(object_name)}</Key></Object>"
end
end
data << "</Delete>"
headers['Content-Length'] = data.length
headers['Content-MD5'] = Base64.encode64(Digest::MD5.digest(data)).
gsub("\n", '')
request({
:body => data,
:expects => 200,
:headers => headers,
:bucket_name => bucket_name,
:method => 'POST',
:parser => Fog::Parsers::Storage::AWS::DeleteMultipleObjects.new,
:query => {'delete' => nil}
})
end
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment