Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Sets a Cache-Control HTTP header on all matching objects in an Amazon S3 bucket
# Amazon S3 Cache-Control Processor
# Programmatically applies the specified Cache-Control metadata to all
# matching objects (files) in an S3 bucket
# Uses the excellent boto library <>
from boto.s3.connection import S3Connection
from boto.s3.key import Key
import boto
import re
# A static class to store configuration data
class Config:
# the name of the S3 bucket
bucket = 'gracechurchmentor'
# a list of Regular Expressions to trigger a skip. Useful for
# blacklisting files which you don't want to process
ignore = (
'^logs', # ignore logfiles
'^SRS' # ignore Steve's files
# value for Cache-Control header
cache_control = 'max-age=259200, public'
def main():
# instantiate the S3 connection
c = S3Connection()
c = boto.connect_s3()
b = c.get_bucket(Config.bucket)
# get a list of all files in the bucket
objects = b.list()
# iterate through the objects (files) in the list
# S3 buckets don't technically have hierarchical directory
# structures; rather, object names are prepended with
# pseudo-directory names for easier organization. But in fact, all
# objects exist within a single top-level, making it very easy to
# iterate through them without worrying about recursive functions
# and all that fun stuff. Yay!
for object in objects:
skip = False
# check each object against the ignore list
for pattern in Config.ignore:
# perform the search
match =,
if (match):
skip = True
# only proceed if no matches were found
if (not skip):
print "Modifying %s..." % (
object.set_metadata('cache-control', Config.cache_control)
if (object.get_metadata('cache-control') == Config.cache_control):
print "Success!"
print "Failed!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.