njbair/s3cachecontrol.py

## s3cachecontrol.py
#!/usr/bin/python

# Amazon S3 Cache-Control Processor
#
# Programmatically applies the specified Cache-Control metadata to all
# matching objects (files) in an S3 bucket
#
# Uses the excellent boto library <http://docs.pythonboto.org>


from boto.s3.connection import S3Connection
from boto.s3.key import Key
import boto
import re


# A static class to store configuration data
class Config:
        # the name of the S3 bucket
        bucket = 'gracechurchmentor'

        # a list of Regular Expressions to trigger a skip. Useful for
        # blacklisting files which you don't want to process
        ignore = (
                '^logs', # ignore logfiles
                '^SRS' # ignore Steve's files
        )

        # value for Cache-Control header
        cache_control = 'max-age=259200, public'


def main():

        # instantiate the S3 connection
        c = S3Connection()
        c = boto.connect_s3()
        b = c.get_bucket(Config.bucket)

        # get a list of all files in the bucket
        objects = b.list()

        # iterate through the objects (files) in the list
        #
        # S3 buckets don't technically have hierarchical directory
        # structures; rather, object names are prepended with
        # pseudo-directory names for easier organization. But in fact, all
        # objects exist within a single top-level, making it very easy to
        # iterate through them without worrying about recursive functions
        # and all that fun stuff. Yay!
        for object in objects:
                skip = False

                # check each object against the ignore list
                for pattern in Config.ignore:
                        # perform the search
                        match = re.search(pattern, object.name)
                        if (match):
                                skip = True

                # only proceed if no matches were found
                if (not skip):
                        print "Modifying %s..." % (object.name)

                        object.set_metadata('cache-control', Config.cache_control)

                        if (object.get_metadata('cache-control') == Config.cache_control):
                                print "Success!"
                        else:
                                print "Failed!"


main()
	#!/usr/bin/python

	# Amazon S3 Cache-Control Processor
	#
	# Programmatically applies the specified Cache-Control metadata to all
	# matching objects (files) in an S3 bucket
	#
	# Uses the excellent boto library <http://docs.pythonboto.org>



	from boto.s3.connection import S3Connection
	from boto.s3.key import Key
	import boto
	import re



	# A static class to store configuration data
	class Config:
	# the name of the S3 bucket
	bucket = 'gracechurchmentor'

	# a list of Regular Expressions to trigger a skip. Useful for
	# blacklisting files which you don't want to process
	ignore = (
	'^logs', # ignore logfiles
	'^SRS' # ignore Steve's files
	)

	# value for Cache-Control header
	cache_control = 'max-age=259200, public'



	def main():

	# instantiate the S3 connection
	c = S3Connection()
	c = boto.connect_s3()
	b = c.get_bucket(Config.bucket)

	# get a list of all files in the bucket
	objects = b.list()

	# iterate through the objects (files) in the list
	#
	# S3 buckets don't technically have hierarchical directory
	# structures; rather, object names are prepended with
	# pseudo-directory names for easier organization. But in fact, all
	# objects exist within a single top-level, making it very easy to
	# iterate through them without worrying about recursive functions
	# and all that fun stuff. Yay!
	for object in objects:
	skip = False

	# check each object against the ignore list
	for pattern in Config.ignore:
	# perform the search
	match = re.search(pattern, object.name)
	if (match):
	skip = True

	# only proceed if no matches were found
	if (not skip):
	print "Modifying %s..." % (object.name)

	object.set_metadata('cache-control', Config.cache_control)

	if (object.get_metadata('cache-control') == Config.cache_control):
	print "Success!"
	else:
	print "Failed!"



	main()