eric1234/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Description

This script is conceptually based on s3nuke but with an entirely different implementation. The key differences are:

Uses deleteObjects to delete in batches rather than calling a delete for each key.
Is implemented in node to take advantage of it's evented IO

Performance

On my office connection I can delete about 50,000 objects per minute. Since each batch is two queries (one to get the list and one to request the delete) it generally is deleting about as fast as it can know about the files. This means the completion rate generally hovers around 100% (making the progress bar and remaining time somewhat meaningless).
Configuration

The following environment variables are used for access and to know what bucket to operate on:

AWS_ACCESS_KEY_ID
AWS_SECRET_ACCESS_KEY
S3_BUCKET

In addition the script takes an optional command line argument that limits the deletion to just the given value as a key prefix. To make it easier to run you can create a .env file with the environment variables using the standard dotenv convention.
Installation

Copy the files in this gist to a directory and run:
npm install

This will download and install the dependencies. Next you need to convert the coffeescript to JS.
coffee -c s3nuke-node.coffee

This assumes you have both npm, node and coffeescript installed.
Usage

Once installed setup a .env file with your credentials and the bucket you want to clear. Then run:
node s3nuke-node.js my-folder

The argument is optional. Leave it off to delete the entire bucket contents. On some systems the command
will be nodejs instead of node.
Random Errors

Occasionally S3 will return a random error. This is normal. It will kill the script but you can just restart.

  
## package.json
{
  "name": "s3nuke",
  "description": "A script to mass delete files on S3",
  "author": "Eric Anderson <eric@pixelwareinc.com>",
  "dependencies": {
    "dotenv": ">= 0",
    "aws-sdk": ">= 0",
    "pace": ">= 0"
  }
}

## s3nuke-node.coffee
require('dotenv').load();

AWS = require 'aws-sdk'
client = new AWS.S3

bucket = process.env.S3_BUCKET
prefix = process.argv[2] or null

opener = if prefix
  "Removing all files starting with #{prefix} from #{bucket}"
else
  "Removing all files in #{bucket}"
console.log opener

progress = require('pace') 1
deleted = 0

die = (msg) ->
  console.error msg
  process.exit 1

next = (marker=null) ->
  query =
    Bucket: bucket
    Marker: marker
    Prefix: prefix
  client.listObjects query, (err, list) ->
    die err if err
    die 'No files found' if list.Contents.length == 0
    progress.total += list.Contents.length
    keys = (Key: item.Key for item in list.Contents)
    cmd =
      Bucket: bucket
      Delete:
        Objects: keys
    # To break the stack and give Node a chance to do other things
    process.nextTick ->
      if list.IsTruncated
        next list.NextMarker
      else
        progress.op()
      client.deleteObjects cmd, (err, result) ->
        die err if err
        deleted += result.Deleted.length
        progress.op deleted

next()
	{
	"name": "s3nuke",
	"description": "A script to mass delete files on S3",
	"author": "Eric Anderson <eric@pixelwareinc.com>",
	"dependencies": {
	"dotenv": ">= 0",
	"aws-sdk": ">= 0",
	"pace": ">= 0"
	}
	}
	require('dotenv').load();

	AWS = require 'aws-sdk'
	client = new AWS.S3

	bucket = process.env.S3_BUCKET
	prefix = process.argv[2] or null

	opener = if prefix
	"Removing all files starting with #{prefix} from #{bucket}"
	else
	"Removing all files in #{bucket}"
	console.log opener

	progress = require('pace') 1
	deleted = 0

	die = (msg) ->
	console.error msg
	process.exit 1

	next = (marker=null) ->
	query =
	Bucket: bucket
	Marker: marker
	Prefix: prefix
	client.listObjects query, (err, list) ->
	die err if err
	die 'No files found' if list.Contents.length == 0
	progress.total += list.Contents.length
	keys = (Key: item.Key for item in list.Contents)
	cmd =
	Bucket: bucket
	Delete:
	Objects: keys
	# To break the stack and give Node a chance to do other things
	process.nextTick ->
	if list.IsTruncated
	next list.NextMarker
	else
	progress.op()
	client.deleteObjects cmd, (err, result) ->
	die err if err
	deleted += result.Deleted.length
	progress.op deleted

	next()