Skip to content

Instantly share code, notes, and snippets.

@fabien7337
Last active August 29, 2015 13:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fabien7337/10027163 to your computer and use it in GitHub Desktop.
Save fabien7337/10027163 to your computer and use it in GitHub Desktop.
exportDynamoDBTableToS3
--####################################################################
--# Export schema-less table from DDB to S3
--#
--# Params:
--# DYNAMODB_INPUT_TABLE - name of input table
--# S3_OUTPUT_BUCKET - output bucket s3 path
--# DYNAMODB_READ_PERCENT - percent of table RCU to use
--# DYNAMODB_ENDPOINT - dynamodb service endpoint to use
--####################################################################
SET dynamodb.endpoint=${DYNAMODB_ENDPOINT};
SET dynamodb.throughput.read.percent = ${DYNAMODB_READ_PERCENT};
-- Drop tables
DROP table dynamodb_table;
DROP table s3_table;
-- Map DynamoDB Table
CREATE EXTERNAL TABLE dynamodb_table (item map<string,string>)
STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler'
TBLPROPERTIES ("dynamodb.table.name" = "${DYNAMODB_INPUT_TABLE}");
-- Create table in S3
CREATE EXTERNAL TABLE s3_table (item map<string, string>)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
LOCATION '${S3_OUTPUT_BUCKET}';
-- Load S3 Table with data from DynamoDB
INSERT OVERWRITE TABLE s3_table SELECT * FROM dynamodb_table;
@fabien7337
Copy link
Author

s3://elasticmapreduce/libs/hive/hive-script --run-hive-script --hive-versions latest --args -f s3://elasticmapreduce/libs/hive/dynamodb/exportDynamoDBTableToS3 -d DYNAMODB_INPUT_TABLE={TABLE_NAME} -d S3_OUTPUT_BUCKET={OUTPUT} -d DYNAMODB_READ_PERCENT=1.0 -d DYNAMODB_ENDPOINT=dynamodb.us-east-1.amazonaws.com

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment