Skip to content

Instantly share code, notes, and snippets.

@madssj
Last active December 26, 2015 08:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save madssj/7119791 to your computer and use it in GitHub Desktop.
Save madssj/7119791 to your computer and use it in GitHub Desktop.
Restores keys from Amazon Glaicer based on a given bucket and prefix. Uses multiprocessing to speed up marking keys as restored.
import sys
import boto
import multiprocessing
NUM_PROCS = 16
bucket_name = sys.argv[1]
prefix = sys.argv[2]
try:
days = int(sys.argv[3])
except IndexError:
days = 7
if not (bucket_name and prefix):
sys.stderr.write(
"Usage: {0} <bucket> <prefix> [<days>]\n".format(sys.argv[0])
)
sys.exit(1)
try:
conn = boto.connect_s3()
except boto.exception.NoAuthHandlerFound:
sys.stderr.write(
"Error: boto failed to connect to s3, check AWS_ACCESS_KEY_ID and\n"
"AWS_SECRET_ACCESS_KEY env variables."
)
raise
try:
bucket = conn.get_bucket(bucket_name)
except boto.exception.S3ResponseError:
sys.stderr.write(
"Error: bucket {0} not found or access denied\n".format(bucket_name)
)
sys.exit(2)
def restore_process(queue):
conn = boto.connect_s3()
bucket = conn.get_bucket(bucket_name)
items = queue.get()
while items is not None:
for item in items:
key = bucket.get_key(item)
if not key.ongoing_restore:
key.restore(days)
print item
items = queue.get()
queue = multiprocessing.Queue()
for i in range(NUM_PROCS):
multiprocessing.Process(target=restore_process, args=[queue]).start()
items = []
for key in bucket.list(prefix=prefix):
if key.storage_class == "GLACIER":
items.append(key.name)
if len(items) > 128:
queue.put(items)
items = []
if items:
queue.put(items)
for i in range(NUM_PROCS):
queue.put(None)
print "waiting"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment