Last active
August 3, 2021 06:47
-
-
Save josep11/c39bf4f72a3e643a846c31c630ed257b to your computer and use it in GitHub Desktop.
Upload json folder Dataset to DynamoDB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ------------ NodeJS runtime --------------- | |
// Add aws-sdk in package.json as a dependency | |
// Example: | |
// { | |
// "dependencies": { | |
// "aws-sdk": "^2.0.9", | |
// } | |
// } | |
// Create your credentials file at ~/.aws/credentials (C:\Users\USER_NAME\.aws\credentials for Windows users) | |
// Format of the above file should be: | |
// [default] | |
// aws_access_key_id = YOUR_ACCESS_KEY_ID | |
// aws_secret_access_key = YOUR_SECRET_ACCESS_KEY | |
const AWS = require('aws-sdk'); | |
// Create the DynamoDB Client with the region you want | |
const region = 'eu-west-3'; | |
//TODO: tablename as env variable | |
const TableName = 'infofeina'; | |
async function exampleTestFunctions() { | |
const { dynamoDbClient, docClient } = createDynamoDbClient(region); | |
// Call DynamoDB's scan API | |
// Create the input for scan call | |
const scanInput = createScanInput(); | |
try { | |
const items = await executeScan(dynamoDbClient, scanInput); | |
console.info('Scan API call has been executed.'); | |
} catch (error) { | |
console.error('error during scan function'); | |
} | |
} | |
function createDynamoDbClient(regionName = 'eu-west-3') { | |
// Set the region | |
AWS.config.update({ region: regionName }); | |
// Use the following config instead when using DynamoDB Local | |
AWS.config.update({ region: 'localhost', endpoint: 'http://localhost:8000', accessKeyId: 'access_key_id', secretAccessKey: 'secret_access_key' }); | |
const dynamoDbClient = new AWS.DynamoDB(); | |
const docClient = new AWS.DynamoDB.DocumentClient({ service: dynamoDbClient }); | |
return { | |
docClient, | |
dynamoDbClient | |
}; | |
} | |
function createScanInput() { | |
return { | |
TableName, | |
"ConsistentRead": false | |
} | |
} | |
/** | |
* | |
* @param {*} dynamoDbClient | |
* @param {*} scanInput | |
* @returns items array | |
*/ | |
async function executeScan(dynamoDbClient, scanInput) { | |
// Call DynamoDB's scan API | |
try { | |
const scanOutput = await dynamoDbClient.scan(scanInput).promise(); | |
console.info('Scan successful.'); | |
const items = scanOutput.Items; | |
console.log(`Num of items: ${items.length}`); | |
return items; | |
// Handle scanOutput | |
} catch (err) { | |
handleScanError(err); | |
} | |
} | |
// Handles errors during Scan execution. Use recommendations in error messages below to | |
// add error handling specific to your application use-case. | |
function handleScanError(err) { | |
if (!err) { | |
console.error('Encountered error object was empty'); | |
return; | |
} | |
if (!err.code) { | |
console.error(`An exception occurred, investigate and configure retry strategy. Error: ${JSON.stringify(err)}`); | |
return; | |
} | |
// here are no API specific errors to handle for Scan, common DynamoDB API errors are handled below | |
handleCommonErrors(err); | |
} | |
function handleCommonErrors(err) { | |
switch (err.code) { | |
case 'InternalServerError': | |
console.error(`Internal Server Error, generally safe to retry with exponential back-off. Error: ${err.message}`); | |
return; | |
case 'ProvisionedThroughputExceededException': | |
console.error(`Request rate is too high. If you're using a custom retry strategy make sure to retry with exponential back-off. ` | |
+ `Otherwise consider reducing frequency of requests or increasing provisioned capacity for your table or secondary index. Error: ${err.message}`); | |
return; | |
case 'ResourceNotFoundException': | |
console.error(`One of the tables was not found, verify table exists before retrying. Error: ${err.message}`); | |
return; | |
case 'ServiceUnavailable': | |
console.error(`Had trouble reaching DynamoDB. generally safe to retry with exponential back-off. Error: ${err.message}`); | |
return; | |
case 'ThrottlingException': | |
console.error(`Request denied due to throttling, generally safe to retry with exponential back-off. Error: ${err.message}`); | |
return; | |
case 'UnrecognizedClientException': | |
console.error(`The request signature is incorrect most likely due to an invalid AWS access key ID or secret key, fix before retrying. ` | |
+ `Error: ${err.message}`); | |
return; | |
case 'ValidationException': | |
console.error(`The input fails to satisfy the constraints specified by DynamoDB, ` | |
+ `fix input before retrying. Error: ${err.message}`); | |
return; | |
case 'RequestLimitExceeded': | |
console.error(`Throughput exceeds the current throughput limit for your account, ` | |
+ `increase account level throughput before retrying. Error: ${err.message}`); | |
return; | |
default: | |
console.error(`An exception occurred, investigate and configure retry strategy. Error: ${err.message}`); | |
return; | |
} | |
} | |
async function putItem(docClient, item) { | |
const res = await docClient.put({ | |
TableName, | |
// ReturnValues: 'ALL_OLD', // NONE | ALL_OLD | UPDATED_OLD | ALL_NEW | UPDATED_NEW | |
Item: item, | |
}).promise(); | |
if (res.$response.error) { console.error('error:', res.$response.error); } | |
console.log(`written item "${item.pk}"`); | |
return item; | |
} | |
module.exports = { | |
createDynamoDbClient, | |
putItem, | |
exampleTestFunctions | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { | |
createDynamoDbClient, | |
putItem, | |
// exampleTestFunctions | |
} = require('./Dynamo'); | |
const path = require('path'); | |
const fs = require('fs'); | |
const moment = require('moment'); | |
//joining path of directory | |
//directory relative to the executing file | |
const directoryPath = path.join(__dirname, './logs/dataset'); | |
function transformItemToDynamoDBSchema(item) { | |
// input: | |
// { | |
// "id": "148713", | |
// "dadesFullText": ...: | |
// "teletreball": false, | |
// "date": "2021-08-03T00:00:00.000Z", | |
// "url": "http://www.example.com/fitxa.php?idoferta=148713" | |
// } | |
const { teletreball, url, dadesFullText } = item; | |
let date = moment(item.date); | |
const DB_DATEFORMAT = 'YYYY-MM-DD'; | |
const dateF = date.format(DB_DATEFORMAT); | |
return { | |
pk: `AD#${item.id}`, // AD#551234 | |
sk: `#DATE#${dateF}`, // #DATE#2021-07-31 | |
teletreball, | |
url, | |
dadesFullText | |
} | |
} | |
async function parseDirAndSaveToDynamo(docClient) { | |
// list all files in the directory | |
try { | |
const files = fs.readdirSync(directoryPath); | |
for (const file of files) { | |
if (!file.includes('.json')) { | |
continue; | |
} | |
const content = await fs.readFileSync(path.resolve(directoryPath, file)); | |
let json; | |
try { | |
json = JSON.parse(content); | |
} catch (error) { | |
console.error('error parsing json in file: ', file); | |
continue; | |
} | |
json = transformItemToDynamoDBSchema(json); | |
await putItem(docClient, json); | |
} | |
} catch (err) { | |
console.log(err); | |
} | |
} | |
(async () => { | |
// await exampleTestFunctions(); | |
const { dynamoDbClient, docClient } = createDynamoDbClient(); | |
await parseDirAndSaveToDynamo(docClient); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment