Skip to content

Instantly share code, notes, and snippets.

@ryanlwh
Last active September 9, 2023 14:13
Show Gist options
  • Save ryanlwh/965c58e204a950dff4d7a0cb03c6e3be to your computer and use it in GitHub Desktop.
Save ryanlwh/965c58e204a950dff4d7a0cb03c6e3be to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Adapted from http://justprogrammng.blogspot.com/2016/08/python-script-to-move-records-from-csv.html
# Script to write csv records into dynamo db table.
# For help on prerequisites and running this script, read this blog.
from __future__ import print_function # Python 2/3 compatibility
from __future__ import division #Python 2/3 compatiblity for integer division
import argparse
import boto3
import csv
import time
from decimal import Decimal
# command line arguments
parser = argparse.ArgumentParser(description='Write CSV records to dynamo db table. CSV Header must map to dynamo table field names.')
parser.add_argument('csvFile', help='Path to csv file location')
parser.add_argument('table', help='Dynamo db table name')
parser.add_argument('--hasDataType', default=False, required=False, action='store_true', help='Has second row of data type for each field')
parser.add_argument('--delimiter', default=',', nargs='?', help='Delimiter for csv records (default=|)')
parser.add_argument('--useLocalEndpoint', default=False, required=False, action='store_true', help='Use local endpoint http://localhost:8000')
parser.add_argument('--region', default='ap-northeast-1', nargs='?', help='Dynamo db region name (default=us-west-2')
parser.add_argument('--writeRate', default=5, type=int, nargs='?', help='Number of records to write in table per second (default:5)')
args = parser.parse_args()
print(args)
# dynamodb and table initialization
endpointUrl = "http://localhost:8000" if args.useLocalEndpoint else "https://dynamodb." + args.region + ".amazonaws.com"
dynamodb = boto3.resource('dynamodb', region_name=args.region, endpoint_url=endpointUrl)
table = dynamodb.Table(args.table)
# write records to dynamo db
with open(args.csvFile) as csv_file:
tokens = csv.reader(csv_file, delimiter=args.delimiter)
# read first line in file which contains dynamo db field names
try:
# Hack to handle Python 2 compatibility
header = tokens.next();
except AttributeError:
header = next(tokens);
# read second line in file which contains dynamo db field data types
if args.hasDataType:
try:
headerFormat = tokens.next();
except AttributeError:
headerFormat = next(tokens);
else:
headerFormat = [''] * len(header)
# rest of file contain new records
for token in tokens:
item = {}
for i,val in enumerate(token):
if val:
key = header[i]
if headerFormat[i]=='int':
val = int(val)
elif headerFormat[i]=='bool':
val = bool(val)
elif headerFormat[i].lower() in ('float', 'decimal'):
val = Decimal(val)
item[key] = val
print(item)
result = table.put_item(Item = item)
print(result)
time.sleep(1/args.writeRate) # to accomodate max write provisioned capacity for table
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment