Last active
September 9, 2023 14:13
-
-
Save ryanlwh/965c58e204a950dff4d7a0cb03c6e3be to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Adapted from http://justprogrammng.blogspot.com/2016/08/python-script-to-move-records-from-csv.html | |
# Script to write csv records into dynamo db table. | |
# For help on prerequisites and running this script, read this blog. | |
from __future__ import print_function # Python 2/3 compatibility | |
from __future__ import division #Python 2/3 compatiblity for integer division | |
import argparse | |
import boto3 | |
import csv | |
import time | |
from decimal import Decimal | |
# command line arguments | |
parser = argparse.ArgumentParser(description='Write CSV records to dynamo db table. CSV Header must map to dynamo table field names.') | |
parser.add_argument('csvFile', help='Path to csv file location') | |
parser.add_argument('table', help='Dynamo db table name') | |
parser.add_argument('--hasDataType', default=False, required=False, action='store_true', help='Has second row of data type for each field') | |
parser.add_argument('--delimiter', default=',', nargs='?', help='Delimiter for csv records (default=|)') | |
parser.add_argument('--useLocalEndpoint', default=False, required=False, action='store_true', help='Use local endpoint http://localhost:8000') | |
parser.add_argument('--region', default='ap-northeast-1', nargs='?', help='Dynamo db region name (default=us-west-2') | |
parser.add_argument('--writeRate', default=5, type=int, nargs='?', help='Number of records to write in table per second (default:5)') | |
args = parser.parse_args() | |
print(args) | |
# dynamodb and table initialization | |
endpointUrl = "http://localhost:8000" if args.useLocalEndpoint else "https://dynamodb." + args.region + ".amazonaws.com" | |
dynamodb = boto3.resource('dynamodb', region_name=args.region, endpoint_url=endpointUrl) | |
table = dynamodb.Table(args.table) | |
# write records to dynamo db | |
with open(args.csvFile) as csv_file: | |
tokens = csv.reader(csv_file, delimiter=args.delimiter) | |
# read first line in file which contains dynamo db field names | |
try: | |
# Hack to handle Python 2 compatibility | |
header = tokens.next(); | |
except AttributeError: | |
header = next(tokens); | |
# read second line in file which contains dynamo db field data types | |
if args.hasDataType: | |
try: | |
headerFormat = tokens.next(); | |
except AttributeError: | |
headerFormat = next(tokens); | |
else: | |
headerFormat = [''] * len(header) | |
# rest of file contain new records | |
for token in tokens: | |
item = {} | |
for i,val in enumerate(token): | |
if val: | |
key = header[i] | |
if headerFormat[i]=='int': | |
val = int(val) | |
elif headerFormat[i]=='bool': | |
val = bool(val) | |
elif headerFormat[i].lower() in ('float', 'decimal'): | |
val = Decimal(val) | |
item[key] = val | |
print(item) | |
result = table.put_item(Item = item) | |
print(result) | |
time.sleep(1/args.writeRate) # to accomodate max write provisioned capacity for table |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment