Skip to content

Instantly share code, notes, and snippets.

@danodonovan
Created July 18, 2018 10:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danodonovan/bd8abebe1ae4c42ba9558e1da4c2ff05 to your computer and use it in GitHub Desktop.
Save danodonovan/bd8abebe1ae4c42ba9558e1da4c2ff05 to your computer and use it in GitHub Desktop.
Script to restore items deleted from a versioned S3 bueckt
import logging
import boto3
from healx.batching import batched
from healx.iterables import flatten
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__file__)
client = boto3.client('s3')
def deleted_object_markers(bucket, prefix):
def _get_objects(key_marker):
return client.list_object_versions(
Bucket=bucket,
Prefix=prefix,
KeyMarker=key_marker
)
objects = []
key_marker = ''
while True:
next_objects = _get_objects(key_marker=key_marker)
objects.extend(next_objects['DeleteMarkers'])
if 'NextKeyMarker' not in next_objects:
break
else:
key_marker = next_objects['NextKeyMarker']
return objects
def delete_deleted_object_markers(bucket, keys):
def _delete_objects(keys_and_ids):
# NB max no. keys is 1000
delete_objects = {
'Objects': [
{
'Key': key,
'VersionId': version_id
}
for key, version_id in keys_and_ids
],
'Quiet': False
}
return client.delete_objects(
Bucket=bucket,
Delete=delete_objects
)
responses = [_delete_objects(batch_of_keys) for batch_of_keys in batched(keys, batch_size=1000)]
return flatten([response['Deleted'] for response in responses])
if __name__ == '__main__':
from operator import itemgetter
bucket = '<your bucket name>'
prefix = '<path to deleted files in bucket>'
objects = deleted_object_markers(bucket, prefix)
logger.info("{n} deleted objects found, with date ranges {min_date} - {max_date}".format(
n=len(objects),
min_date=min(map(itemgetter('LastModified'), objects)),
max_date=max(map(itemgetter('LastModified'), objects)),
))
# Consider commenting these lines out when testing you're going to restore the correct objects
object_keys_and_ids = [(object_['Key'], object_['VersionId']) for object_ in objects]
deleted_objects = delete_deleted_object_markers(bucket, object_keys_and_ids)
logger.info("Restored {n} objects".format(n=len(deleted_objects)))
@danodonovan
Copy link
Author

NB You can't easily restore-object once it's been deleted. Deleting the DeleteMarkers was non-obvious (to me at least) but described in the AWS docs CF https://docs.aws.amazon.com/AmazonS3/latest/user-guide/undelete-objects.html

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment