dpwrussell/s3cache.py

## s3cache.py
import boto3
from botocore.exceptions import ClientError

import hashlib
import os
import errno

def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

def hash_file(path):
    sha1 = hashlib.sha1()
    f = open(path, 'rb')
    try:
        sha1.update(f.read())
    finally:
        f.close()
    return sha1.hexdigest()

cache_dir = '/tmp/s3cache/'
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
bucket = s3.Bucket('dpwr')

def cache_file(path, fullpath):
    mkdir_p(os.path.dirname(fullpath))
    s3_client.download_file('dpwr', path, fullpath)

def get_file(path):
    fullpath = os.path.join(cache_dir, path)

    obj = bucket.Object(path)
    try:
        obj.get()
    except ClientError as e:
        print 'File %s not in S3' % path
        exit(1)

    if not os.path.isfile(fullpath):
        cache_file(path, fullpath)

    else:
        # Check that the local cache matches S3
        sha1_s3 = obj.metadata['sha1']
        sha1_cache = hash_file(fullpath)

        if sha1_s3 != sha1_cache:
            cache_file(path, fullpath)

    return fullpath

print get_file('s3test/hs.tif')
	import boto3
	from botocore.exceptions import ClientError

	import hashlib
	import os
	import errno

	def mkdir_p(path):
	try:
	os.makedirs(path)
	except OSError as e:
	if e.errno == errno.EEXIST and os.path.isdir(path):
	pass
	else:
	raise

	def hash_file(path):
	sha1 = hashlib.sha1()
	f = open(path, 'rb')
	try:
	sha1.update(f.read())
	finally:
	f.close()
	return sha1.hexdigest()

	cache_dir = '/tmp/s3cache/'
	s3 = boto3.resource('s3')
	s3_client = boto3.client('s3')
	bucket = s3.Bucket('dpwr')

	def cache_file(path, fullpath):
	mkdir_p(os.path.dirname(fullpath))
	s3_client.download_file('dpwr', path, fullpath)

	def get_file(path):
	fullpath = os.path.join(cache_dir, path)

	obj = bucket.Object(path)
	try:
	obj.get()
	except ClientError as e:
	print 'File %s not in S3' % path
	exit(1)

	if not os.path.isfile(fullpath):
	cache_file(path, fullpath)

	else:
	# Check that the local cache matches S3
	sha1_s3 = obj.metadata['sha1']
	sha1_cache = hash_file(fullpath)

	if sha1_s3 != sha1_cache:
	cache_file(path, fullpath)

	return fullpath

	print get_file('s3test/hs.tif')