Skip to content

Instantly share code, notes, and snippets.

@josep11
Last active August 3, 2021 06:47
Show Gist options
  • Save josep11/c39bf4f72a3e643a846c31c630ed257b to your computer and use it in GitHub Desktop.
Save josep11/c39bf4f72a3e643a846c31c630ed257b to your computer and use it in GitHub Desktop.
Upload json folder Dataset to DynamoDB
// ------------ NodeJS runtime ---------------
// Add aws-sdk in package.json as a dependency
// Example:
// {
// "dependencies": {
// "aws-sdk": "^2.0.9",
// }
// }
// Create your credentials file at ~/.aws/credentials (C:\Users\USER_NAME\.aws\credentials for Windows users)
// Format of the above file should be:
// [default]
// aws_access_key_id = YOUR_ACCESS_KEY_ID
// aws_secret_access_key = YOUR_SECRET_ACCESS_KEY
const AWS = require('aws-sdk');
// Create the DynamoDB Client with the region you want
const region = 'eu-west-3';
//TODO: tablename as env variable
const TableName = 'infofeina';
async function exampleTestFunctions() {
const { dynamoDbClient, docClient } = createDynamoDbClient(region);
// Call DynamoDB's scan API
// Create the input for scan call
const scanInput = createScanInput();
try {
const items = await executeScan(dynamoDbClient, scanInput);
console.info('Scan API call has been executed.');
} catch (error) {
console.error('error during scan function');
}
}
function createDynamoDbClient(regionName = 'eu-west-3') {
// Set the region
AWS.config.update({ region: regionName });
// Use the following config instead when using DynamoDB Local
AWS.config.update({ region: 'localhost', endpoint: 'http://localhost:8000', accessKeyId: 'access_key_id', secretAccessKey: 'secret_access_key' });
const dynamoDbClient = new AWS.DynamoDB();
const docClient = new AWS.DynamoDB.DocumentClient({ service: dynamoDbClient });
return {
docClient,
dynamoDbClient
};
}
function createScanInput() {
return {
TableName,
"ConsistentRead": false
}
}
/**
*
* @param {*} dynamoDbClient
* @param {*} scanInput
* @returns items array
*/
async function executeScan(dynamoDbClient, scanInput) {
// Call DynamoDB's scan API
try {
const scanOutput = await dynamoDbClient.scan(scanInput).promise();
console.info('Scan successful.');
const items = scanOutput.Items;
console.log(`Num of items: ${items.length}`);
return items;
// Handle scanOutput
} catch (err) {
handleScanError(err);
}
}
// Handles errors during Scan execution. Use recommendations in error messages below to
// add error handling specific to your application use-case.
function handleScanError(err) {
if (!err) {
console.error('Encountered error object was empty');
return;
}
if (!err.code) {
console.error(`An exception occurred, investigate and configure retry strategy. Error: ${JSON.stringify(err)}`);
return;
}
// here are no API specific errors to handle for Scan, common DynamoDB API errors are handled below
handleCommonErrors(err);
}
function handleCommonErrors(err) {
switch (err.code) {
case 'InternalServerError':
console.error(`Internal Server Error, generally safe to retry with exponential back-off. Error: ${err.message}`);
return;
case 'ProvisionedThroughputExceededException':
console.error(`Request rate is too high. If you're using a custom retry strategy make sure to retry with exponential back-off. `
+ `Otherwise consider reducing frequency of requests or increasing provisioned capacity for your table or secondary index. Error: ${err.message}`);
return;
case 'ResourceNotFoundException':
console.error(`One of the tables was not found, verify table exists before retrying. Error: ${err.message}`);
return;
case 'ServiceUnavailable':
console.error(`Had trouble reaching DynamoDB. generally safe to retry with exponential back-off. Error: ${err.message}`);
return;
case 'ThrottlingException':
console.error(`Request denied due to throttling, generally safe to retry with exponential back-off. Error: ${err.message}`);
return;
case 'UnrecognizedClientException':
console.error(`The request signature is incorrect most likely due to an invalid AWS access key ID or secret key, fix before retrying. `
+ `Error: ${err.message}`);
return;
case 'ValidationException':
console.error(`The input fails to satisfy the constraints specified by DynamoDB, `
+ `fix input before retrying. Error: ${err.message}`);
return;
case 'RequestLimitExceeded':
console.error(`Throughput exceeds the current throughput limit for your account, `
+ `increase account level throughput before retrying. Error: ${err.message}`);
return;
default:
console.error(`An exception occurred, investigate and configure retry strategy. Error: ${err.message}`);
return;
}
}
async function putItem(docClient, item) {
const res = await docClient.put({
TableName,
// ReturnValues: 'ALL_OLD', // NONE | ALL_OLD | UPDATED_OLD | ALL_NEW | UPDATED_NEW
Item: item,
}).promise();
if (res.$response.error) { console.error('error:', res.$response.error); }
console.log(`written item "${item.pk}"`);
return item;
}
module.exports = {
createDynamoDbClient,
putItem,
exampleTestFunctions
}
const {
createDynamoDbClient,
putItem,
// exampleTestFunctions
} = require('./Dynamo');
const path = require('path');
const fs = require('fs');
const moment = require('moment');
//joining path of directory
//directory relative to the executing file
const directoryPath = path.join(__dirname, './logs/dataset');
function transformItemToDynamoDBSchema(item) {
// input:
// {
// "id": "148713",
// "dadesFullText": ...:
// "teletreball": false,
// "date": "2021-08-03T00:00:00.000Z",
// "url": "http://www.example.com/fitxa.php?idoferta=148713"
// }
const { teletreball, url, dadesFullText } = item;
let date = moment(item.date);
const DB_DATEFORMAT = 'YYYY-MM-DD';
const dateF = date.format(DB_DATEFORMAT);
return {
pk: `AD#${item.id}`, // AD#551234
sk: `#DATE#${dateF}`, // #DATE#2021-07-31
teletreball,
url,
dadesFullText
}
}
async function parseDirAndSaveToDynamo(docClient) {
// list all files in the directory
try {
const files = fs.readdirSync(directoryPath);
for (const file of files) {
if (!file.includes('.json')) {
continue;
}
const content = await fs.readFileSync(path.resolve(directoryPath, file));
let json;
try {
json = JSON.parse(content);
} catch (error) {
console.error('error parsing json in file: ', file);
continue;
}
json = transformItemToDynamoDBSchema(json);
await putItem(docClient, json);
}
} catch (err) {
console.log(err);
}
}
(async () => {
// await exampleTestFunctions();
const { dynamoDbClient, docClient } = createDynamoDbClient();
await parseDirAndSaveToDynamo(docClient);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment