Skip to content

Instantly share code, notes, and snippets.

@mr337
Created September 20, 2013 21:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mr337/6643860 to your computer and use it in GitHub Desktop.
Save mr337/6643860 to your computer and use it in GitHub Desktop.
Used to calculate daily totals of files changes with file size. Great for determining how much data may have to be synced with offsite backups.
import aniso8601
from boto.s3.connection import S3Connection
from datetime import datetime, timedelta
import pytz
from hurry.filesize import size
con = S3Connection()
bucket = con.get_bucket('britecorepro')
#days to look back
DAYS = 14
files = {}
#iterate through all files within the bucket
beginning_dt = datetime.utcnow().replace(tzinfo=pytz.utc)-timedelta(days=DAYS)
for i in range(0,DAYS+1):
files.setdefault((beginning_dt+timedelta(days=i)).strftime('%m/%d/%y'), [])
print files
count = 0
for f in bucket.list():
last_modified = aniso8601.parse_datetime(f.last_modified)
if last_modified > beginning_dt:
files[last_modified.strftime('%m/%d/%y')].append({'key': f.etag, 'last_modified':f.last_modified, 'size':int(f.size), 'version':f.version_id})
count += 1
if count % 2000 == 0:
print 'Files tracking {0}'.format(count)
#totals
for d in files:
if len(files[d]) > 0:
print 'For {0}, Totals Files: {1} Total Size: {2}'.format(d, len(files[d]), size(sum(map(lambda a: a['size'], files[d]))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment