Skip to content

Instantly share code, notes, and snippets.

@njbair
Created November 29, 2013 19:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save njbair/7710734 to your computer and use it in GitHub Desktop.
Save njbair/7710734 to your computer and use it in GitHub Desktop.
Sets a Cache-Control HTTP header on all matching objects in an Amazon S3 bucket
#!/usr/bin/python
# Amazon S3 Cache-Control Processor
#
# Programmatically applies the specified Cache-Control metadata to all
# matching objects (files) in an S3 bucket
#
# Uses the excellent boto library <http://docs.pythonboto.org>
from boto.s3.connection import S3Connection
from boto.s3.key import Key
import boto
import re
# A static class to store configuration data
class Config:
# the name of the S3 bucket
bucket = 'gracechurchmentor'
# a list of Regular Expressions to trigger a skip. Useful for
# blacklisting files which you don't want to process
ignore = (
'^logs', # ignore logfiles
'^SRS' # ignore Steve's files
)
# value for Cache-Control header
cache_control = 'max-age=259200, public'
def main():
# instantiate the S3 connection
c = S3Connection()
c = boto.connect_s3()
b = c.get_bucket(Config.bucket)
# get a list of all files in the bucket
objects = b.list()
# iterate through the objects (files) in the list
#
# S3 buckets don't technically have hierarchical directory
# structures; rather, object names are prepended with
# pseudo-directory names for easier organization. But in fact, all
# objects exist within a single top-level, making it very easy to
# iterate through them without worrying about recursive functions
# and all that fun stuff. Yay!
for object in objects:
skip = False
# check each object against the ignore list
for pattern in Config.ignore:
# perform the search
match = re.search(pattern, object.name)
if (match):
skip = True
# only proceed if no matches were found
if (not skip):
print "Modifying %s..." % (object.name)
object.set_metadata('cache-control', Config.cache_control)
if (object.get_metadata('cache-control') == Config.cache_control):
print "Success!"
else:
print "Failed!"
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment