Skip to content

Instantly share code, notes, and snippets.

@nikolajbaer
Last active October 21, 2015 04:45
Show Gist options
  • Save nikolajbaer/3093312 to your computer and use it in GitHub Desktop.
Save nikolajbaer/3093312 to your computer and use it in GitHub Desktop.
Sync over S3 buckets
from boto.s3.connection import S3Connection as Connection
import sys,time,threading
source, destination = sys.argv[1:]
srcconnection = Connection()
dstconnection = Connection()
source = srcconnection.get_bucket(source)
destination = dstconnection.lookup(destination) or dstconnection.create_bucket(destination)
print "Building file list..."
items = []
for k in source.list(): #prefix="images/categories"):
#if len(items) == 60: break
items.append(k)
print "%i items found. removing existing keys in destination from list"%len(items)
print "Verifying keys.."
final_items = []
src_lock = threading.Lock()
dest_lock = threading.Lock()
def dest_name(src_name):
return src_name #"oldsite/media/"+src_name
def verify_key():
i=0
while len(items):
src_lock.acquire()
n = items.pop()
src_lock.release()
if n != None:
i+=1
if i%100 == 99:
print "%s processed %i items"%(threading.currentThread().getName(),i)
if not destination.get_key(dest_name(n.name)):
dest_lock.acquire()
final_items.append(n)
dest_lock.release()
threads = []
for i in range(50):
t = threading.Thread(target=verify_key,name="t%i"%i)
t.start()
threads.append(t)
for t in threads:
t.join()
print "reduced target list to %i items"%len(final_items)
print "preparing to copy %i items from %s to %s"%(len(final_items),source,destination)
print "Hit CTRL-C to cancel now!"
#sys.exit(1)
time.sleep(5)
copy_lock = threading.Lock()
def copy_key():
i=0
while len(final_items):
copy_lock.acquire()
src_key = final_items.pop()
copy_lock.release()
if src_key != None:
i+=1
if i%100 == 99:
print "%s copied %i items"%(threading.currentThread().getName(),i)
newKey = destination.copy_key(dest_name(src_key.name),source.name,src_key.name,preserve_acl=True)
print "copying %s from %s to %s %s"%(src_key,source,destination,dest_name(src_key.name))
threads = []
for i in range(50):
t = threading.Thread(target=copy_key,name="ct%i"%i )
t.start()
threads.append(t)
for t in threads:
t.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment