Created
August 16, 2023 17:08
-
-
Save mpurdon/c940d677be6a73ac57fad3a803f60dac to your computer and use it in GitHub Desktop.
Convert a DynamoDB backup to Excel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import {S3Client, ListObjectsV2Command, GetObjectCommand} from '@aws-sdk/client-s3'; | |
import {Workbook, Worksheet, Border} from 'exceljs'; | |
import {createGunzip} from 'zlib'; | |
import {Readable} from 'stream'; | |
// Configuring the AWS environment | |
const s3Client = new S3Client({region: 'us-east-1'}); | |
async function listJsonFiles(bucketName: string, prefix: string): Promise<string[]> { | |
console.log({ | |
message: 'listing json files', | |
bucketName, | |
prefix | |
}) | |
const command = new ListObjectsV2Command({Bucket: bucketName, Prefix: prefix}); | |
const response = await s3Client.send(command); | |
return response.Contents!.map(item => item.Key!).filter(key => key.endsWith('.json.gz')); | |
} | |
/** | |
* Renames attributes in a JSON object based on the provided mapping. | |
* | |
* @param jsonObject The JSON object to be transformed. | |
* @param attributeMapping A mapping of old attribute names to new attribute names. | |
* @return A new JSON object with renamed attributes. | |
*/ | |
function renameAttributes(jsonObject: any, attributeMapping: { [oldAttr: string]: string }): any { | |
for (const oldAttr in attributeMapping) { | |
if (jsonObject.Item && jsonObject.Item[oldAttr]) { | |
jsonObject.Item[attributeMapping[oldAttr]] = jsonObject.Item[oldAttr]; | |
delete jsonObject.Item[oldAttr]; | |
} | |
} | |
return jsonObject; | |
} | |
/** | |
* Reorders the attributes of a JSON object based on the specified array of keys. | |
* | |
* @param jsonObject The JSON object to be reordered. | |
* @param orderedKeys An array of keys indicating the order in which they should appear. | |
* @return A new JSON object with reordered attributes. | |
*/ | |
function reorderJsonObject(jsonObject: any, orderedKeys: string[]): any { | |
if (!jsonObject.Item) return jsonObject; | |
const reordered: { [key: string]: any } = {}; | |
// First, add the keys from the orderedKeys array | |
for (const key of orderedKeys) { | |
if (jsonObject.Item[key]) { | |
reordered[key] = jsonObject.Item[key]; | |
} | |
} | |
// Next, add any remaining keys from the original object | |
for (const key in jsonObject.Item) { | |
if (!reordered[key]) { | |
reordered[key] = jsonObject.Item[key]; | |
} | |
} | |
return {Item: reordered}; | |
} | |
async function fetchBackupFromS3(bucketName: string, key: string): Promise<any[]> { | |
const command = new GetObjectCommand({Bucket: bucketName, Key: key}); | |
const response = await s3Client.send(command); | |
// Attribute renaming mapping | |
const attributeMapping = { | |
"pk": "templateID" | |
}; | |
const orderedKeys = [ | |
"templateID", | |
"name", | |
'createdBy' | |
]; | |
if (response.Body) { | |
const readableStream = response.Body as Readable; // Type assertion | |
return new Promise((resolve, reject) => { | |
let data = ''; | |
const gunzip = createGunzip(); | |
readableStream.pipe(gunzip); | |
gunzip.on('data', chunk => data += chunk); | |
gunzip.on('end', () => { | |
// Split by newlines and filter out any empty strings | |
const lines = data.split('\n').filter(Boolean); | |
// Parse each line as JSON, and then rename attributes | |
const jsonObjects = lines.map(line => { | |
let jsonObject = JSON.parse(line); | |
jsonObject = renameAttributes(jsonObject, attributeMapping); | |
jsonObject = reorderJsonObject(jsonObject, orderedKeys) | |
return jsonObject | |
}); | |
resolve(jsonObjects); | |
}); | |
gunzip.on('error', reject); | |
}); | |
} else { | |
throw new Error('The Body of the response is undefined.'); | |
} | |
} | |
/** | |
* Converts a DynamoDB Item into a JSON object | |
* | |
* @param item The Item to convert | |
*/ | |
function transformDynamoItem(item: any): any { | |
const transformed: { [key: string]: any } = {}; | |
for (const key in item) { | |
for (const type in item[key]) { | |
transformed[key] = item[key][type]; | |
} | |
} | |
return transformed; | |
} | |
/** | |
* Adjusts the style of the header row to be more nicer | |
* | |
* @param worksheet The worksheet to style | |
*/ | |
function styleHeaderRow(worksheet: Worksheet) { | |
const headerRow = worksheet.getRow(1); | |
headerRow.eachCell((cell) => { | |
// Font | |
cell.font = { | |
bold: true, | |
color: {argb: 'FFFFFF'}, // White text | |
size: 14, | |
}; | |
// Fill | |
cell.fill = { | |
type: 'pattern', | |
pattern: 'solid', | |
fgColor: {argb: 'FF4472C4'}, // Blue background | |
}; | |
// Border | |
const border: Border = { | |
style: 'thin', | |
color: {argb: 'FF000000'}, // Black border | |
}; | |
cell.border = { | |
top: border, | |
left: border, | |
bottom: border, | |
right: border, | |
}; | |
}); | |
} | |
/** | |
* Adjusts the column widths for certain columns to match the content length | |
* | |
* @param worksheet The Worksheet to modify | |
* @param data The data to use for sizing | |
* @param columnNames The columns we care about | |
*/ | |
function adjustColumnWidths(worksheet: Worksheet, data: any[], columnNames: string[]) { | |
for (const columnName of columnNames) { | |
let maxContentLength = columnName.length; // At least as wide as the header | |
for (const item of data) { | |
if (item[columnName]) { | |
maxContentLength = Math.max(maxContentLength, item[columnName].length); | |
} | |
} | |
// Find the matching column in the worksheet by header and set its width | |
const column = worksheet.getColumn(columnName); | |
column.width = maxContentLength; | |
} | |
} | |
/** | |
* Convert the given data into an Excel worksheet | |
* | |
* @param data The data to convert | |
* @param outputPath The name of the excel file to write to | |
*/ | |
async function convertToExcel(data: any[], outputPath: string) { | |
const workbook = new Workbook(); | |
const worksheet = workbook.addWorksheet('DynamoDB Data'); | |
const headers = Object.keys(data[0]); | |
worksheet.columns = headers.map(header => ({header, key: header})); | |
worksheet.addRows(data); | |
// Style the header | |
styleHeaderRow(worksheet); | |
// Adjust column widths for specific columns based on the first couple of rows | |
adjustColumnWidths(worksheet, data.slice(0, 2), ['templateID', 'name', 'createdBy']); | |
await workbook.xlsx.writeFile(outputPath); | |
console.log(`Excel file written to ${outputPath}`); | |
} | |
(async () => { | |
const bucketName = '<BACKUP_BUCKET>'; | |
const prefix = 'AWSDynamoDB/<BACKUP_ID>/data/'; | |
const outputFile = 'output.xlsx'; | |
let allData: any[] = []; | |
try { | |
const allKeys = await listJsonFiles(bucketName, prefix); | |
for (const key of allKeys) { | |
const backupData = await fetchBackupFromS3(bucketName, key); | |
for (const item of backupData) { | |
allData.push(transformDynamoItem(item.Item)); | |
} | |
} | |
await convertToExcel(allData, outputFile); | |
} catch (err) { | |
console.error('Error:', err); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Supports the following features:
Install it with:
npm install ts-node typescript @types/node @aws-sdk/client-s3 exceljs
Run it with:
AWS_PROFILE=<your_profile> npx ts-node index.ts