Skip to content

Instantly share code, notes, and snippets.

Forked from mattbillenstein/s3put
Created May 31, 2013 06:45
Show Gist options
  • Save rubiojr/5683295 to your computer and use it in GitHub Desktop.
Save rubiojr/5683295 to your computer and use it in GitHub Desktop.
s3put #s3 #python
#!/usr/bin/env python
import gevent.monkey
import boto
import config
import gevent
import gevent.pool
import os
import sys
import time
import traceback
import optparse
from cStringIO import StringIO
CHUNK_SIZE = 5 * 1024 * 1024
def get_connection():
return boto.connect_s3(aws_access_key_id =['aws_access_key_id'], aws_secret_access_key =['aws_secret_access_key'])
def upload_part(mp, fname, idx, offset):
f = open(fname)
content =
success = False
for x in xrange(3):
conn = get_connection()
bucket = conn.lookup(mp.bucket_name)
p = boto.s3.multipart.MultiPartUpload(bucket) =
p.key_name = mp.key_name
p.upload_part_from_file(StringIO(content), idx+1, replace=True)
success = True
except Exception, e:
print "Error in part upload - %s %s %s" % (fname, idx, offset)
print traceback.format_exc()
assert success, "Part failed - %s %s %s" % (fname, idx, offset)
def upload(options):
conn = get_connection()
bck = conn.create_bucket(options.bucket)
pool = gevent.pool.Pool(options.concurrency)
for fname in options.files:
if options.path == '.':
fpath = os.path.basename(fname)
fpath = os.path.join(options.path, os.path.basename(fname))
s = "Putting: %s -> %s/%s ..." % (fname, options.bucket, fpath),
print "%-80s" % (s),
start = time.time()
size = os.stat(fname).st_size
if size > (CHUNK_SIZE*2) and options.concurrency > 1:
mp = bck.initiate_multipart_upload(fpath, reduced_redundancy=options.reduced_redundancy)
greenlets = []
idx = offset = 0
while offset < size:
greenlets.append( pool.spawn(upload_part, mp, fname, idx, offset) )
idx += 1
offset += CHUNK_SIZE
cmp = mp.complete_upload()
key = bck.new_key(fpath)
f = open(fname)
key.set_contents_from_file(f, reduced_redundancy=options.reduced_redundancy, replace=True)
size = float(size)/1024/1024
elapsed = time.time() - start
print " %6.1f MiB in %.1fs (%d KiB/s)" % (size, elapsed, int(size*1000/elapsed))
def main(argv):
parser = optparse.OptionParser()
parser.set_usage('%prog [options] <bucket> <path> <files>')
parser.add_option('-c', '--concurrency', dest='concurrency', type='int', help='Number of parts to upload simultaneously', default=3)
parser.add_option('-r', '--reduced_redundancy', dest='reduced_redundancy', help='Use S3 reduced redundancy', action='store_true', default=False)
options, args = parser.parse_args()
if not args or len(args) < 3:
options.bucket = args[0]
options.path = args[1]
options.files = args[2:]
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment