Last active
August 29, 2015 13:56
-
-
Save jsonperl/9199093 to your computer and use it in GitHub Desktop.
S3 File Deleter, to delete entire buckets, or subsets...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Use fog to delete all matching files from a bucket, specifically targeting versioned buckets. | |
# | |
# Note: you'll want to 'suspend' versioning since deletes would create a ton of | |
# DeleteMarkers, which you would then have to delete, recursively, | |
# until the end of time. | |
# | |
# Allows for a dry run to see what you're gonna kill before you kill it | |
class S3FileDeleter | |
DELETE_BATCH_SIZE = 900 | |
# Must specifically set dry_run to false as a safeguard against derpy deletes | |
def initialize(bucket, dry_run = true) | |
@bucket = bucket | |
@dry_run = dry_run | |
@current_batch = {} | |
@delete_count = 0 | |
end | |
# Can use a prefix or regex to specify the files to delete, or blank to delete it all. | |
def delete!(prefix, regex) | |
get_file_keys(prefix, regex).each do |key| | |
get_versions(key).each do |version| | |
delete_file version | |
end | |
end | |
run_current_batch if batch_count > 0 | |
if @dry_run | |
puts "Dry run: would have deleted #{@delete_count} files..." | |
else | |
puts "Deleted #{@delete_count} files..." | |
end | |
end | |
def get_file_keys(prefix = nil, regex = nil) | |
opts = {} | |
opts[:prefix] = prefix if prefix | |
keys = [] | |
S3.directories.get(@bucket, opts).files.all.each do |file| | |
# Add this file to the queue. | |
if regex.nil? || file.key.match(regex) | |
keys << file.key | |
end | |
end | |
keys | |
end | |
def batch_count | |
@current_batch.values.flatten.count | |
end | |
def delete_file(version) | |
object = version.has_key?('Version') ? version['Version'] : version['DeleteMarker'] | |
key = object['Key'] | |
versionId = object['VersionId'] == "null" ? nil : object['VersionId'] | |
if @current_batch.has_key? key | |
@current_batch[key] << versionId | |
else | |
@current_batch[key] = [versionId] | |
end | |
if batch_count == DELETE_BATCH_SIZE | |
run_current_batch | |
end | |
end | |
def run_current_batch | |
object_names = @current_batch.keys | |
opts = {'versionId' => @current_batch, :quiet => true} | |
@current_batch.each do |key, versions| | |
puts "== #{@dry_run ? 'Would' : 'Will'} delete key '#{key}'" | |
vers = versions.flatten.compact | |
if vers.size > 0 | |
puts "Versions:" | |
vers.each do |v| | |
puts v | |
end | |
end | |
end | |
unless @dry_run | |
S3.delete_multiple_objects(@bucket, object_names, opts) | |
end | |
@delete_count += batch_count | |
@current_batch = {} | |
end | |
def get_versions(key) | |
opts = {'prefix' => key} | |
isTruncated = true | |
versions = [] | |
while isTruncated | |
result = S3.get_bucket_object_versions(@bucket, opts).body | |
if isTruncated = result['IsTruncated'] | |
opts['version-id-marker'] = result['NextVersionIdMarker'] | |
opts['key-marker'] = result['NextKeyMarker'] | |
end | |
versions += result['Versions'] | |
end | |
versions | |
end | |
end | |
module Fog | |
module Storage | |
class AWS | |
class Real | |
# This did not work like it says it did, so I hacked it up to work correctly | |
def delete_multiple_objects(bucket_name, object_names, options = {}) | |
headers = options.dup | |
data = "<Delete>" | |
data << "<Quiet>true</Quiet>" if headers.delete(:quiet) | |
version_ids = headers.delete('versionId') | |
object_names.each do |object_name| | |
object_version = version_ids.nil? ? nil : version_ids[object_name] | |
if object_version | |
[object_version].flatten.each do |ov| | |
data << "<Object><Key>#{CGI.escapeHTML(object_name)}</Key><VersionId>#{CGI.escapeHTML(ov)}</VersionId></Object>" | |
end | |
else | |
data << "<Object><Key>#{CGI.escapeHTML(object_name)}</Key></Object>" | |
end | |
end | |
data << "</Delete>" | |
headers['Content-Length'] = data.length | |
headers['Content-MD5'] = Base64.encode64(Digest::MD5.digest(data)). | |
gsub("\n", '') | |
request({ | |
:body => data, | |
:expects => 200, | |
:headers => headers, | |
:bucket_name => bucket_name, | |
:method => 'POST', | |
:parser => Fog::Parsers::Storage::AWS::DeleteMultipleObjects.new, | |
:query => {'delete' => nil} | |
}) | |
end | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment