Created March 26, 2014 00:21
Duplicate a table in AWS DynamoDB (for renaming or manual fix)
from boto.dynamodb2.exceptions import ValidationException
from boto.dynamodb2.fields import HashKey, RangeKey
from boto.dynamodb2.layer1 import DynamoDBConnection
from boto.dynamodb2.table import Table
from boto.exception import JSONResponseError
from time import sleep
import sys
if len(sys.argv) != 3:
print 'Usage: %s <source_table_name> <destination_table_name>' % sys.argv[0]
src_table = sys.argv[1]
dst_table = sys.argv[2]
ddbc = DynamoDBConnection()
# 1. Read and copy the target table to be copied
table_struct = None
logs = Table(src_table)
table_struct = logs.describe()
except JSONResponseError:
print "%s not existing" % src_table
print '*** Reading key schema from %s table' % src_table
src = ddbc.describe_table(src_table)['Table']
hash_key = ''
range_key = ''
for schema in src['KeySchema']:
attr_name = schema['AttributeName']
key_type = schema['KeyType']
if key_type == 'HASH':
hash_key = attr_name
elif key_type == 'RANGE':
range_key = attr_name
# 2. Create the new table
table_struct = None
new_logs = Table(dst_table, schema=[HashKey(hash_key),RangeKey(range_key),])
table_struct = new_logs.describe()
print 'Table %s already exists' % dst_table
except JSONResponseError:
new_logs = Table.create(dst_table, schema=[HashKey('trial'),RangeKey('parallel'),])
print '*** Waiting for the new table %s becomes active' % dst_table
while ddbc.describe_table(dst_table)['Table']['TableStatus'] != 'ACTIVE':
# 3. Add the items
for item in logs.scan():
new_item = {}
new_item[hash_key] = item[hash_key]
if range_key != '':
new_item[range_key] = item[range_key]
for f in item.keys():
if f in [hash_key, range_key]:
new_item[f] = item[f]
new_logs.put_item(new_item, overwrite=True)
except ValidationException:
print dst_table, new_item['trial'], new_item['parallel']
except JSONResponseError:
print ddbc.describe_table(dst_table)['Table']['TableStatus']
Thanks for writing - very helpful.

btw, someone made a repo out of it:

ezeeetm commented May 11, 2016

@jakelodwick @iomz

given a source table of x read throughput and a dest table of <x write throughput, this utility will result in throttled writes, which means the throttled data will not be copied to dest.

The converse of this could also exist: a throttled read resulting in nothing returned, which then never gets written.

Am I observing this correctly, or am I missing something?

If so, I may be able to do a PR For something to fix it. Let me know.

