Skip to content

Instantly share code, notes, and snippets.

@mpurdon
Created August 16, 2023 17:08
Show Gist options
  • Save mpurdon/c940d677be6a73ac57fad3a803f60dac to your computer and use it in GitHub Desktop.
Save mpurdon/c940d677be6a73ac57fad3a803f60dac to your computer and use it in GitHub Desktop.
Convert a DynamoDB backup to Excel
import {S3Client, ListObjectsV2Command, GetObjectCommand} from '@aws-sdk/client-s3';
import {Workbook, Worksheet, Border} from 'exceljs';
import {createGunzip} from 'zlib';
import {Readable} from 'stream';
// Configuring the AWS environment
const s3Client = new S3Client({region: 'us-east-1'});
async function listJsonFiles(bucketName: string, prefix: string): Promise<string[]> {
console.log({
message: 'listing json files',
bucketName,
prefix
})
const command = new ListObjectsV2Command({Bucket: bucketName, Prefix: prefix});
const response = await s3Client.send(command);
return response.Contents!.map(item => item.Key!).filter(key => key.endsWith('.json.gz'));
}
/**
* Renames attributes in a JSON object based on the provided mapping.
*
* @param jsonObject The JSON object to be transformed.
* @param attributeMapping A mapping of old attribute names to new attribute names.
* @return A new JSON object with renamed attributes.
*/
function renameAttributes(jsonObject: any, attributeMapping: { [oldAttr: string]: string }): any {
for (const oldAttr in attributeMapping) {
if (jsonObject.Item && jsonObject.Item[oldAttr]) {
jsonObject.Item[attributeMapping[oldAttr]] = jsonObject.Item[oldAttr];
delete jsonObject.Item[oldAttr];
}
}
return jsonObject;
}
/**
* Reorders the attributes of a JSON object based on the specified array of keys.
*
* @param jsonObject The JSON object to be reordered.
* @param orderedKeys An array of keys indicating the order in which they should appear.
* @return A new JSON object with reordered attributes.
*/
function reorderJsonObject(jsonObject: any, orderedKeys: string[]): any {
if (!jsonObject.Item) return jsonObject;
const reordered: { [key: string]: any } = {};
// First, add the keys from the orderedKeys array
for (const key of orderedKeys) {
if (jsonObject.Item[key]) {
reordered[key] = jsonObject.Item[key];
}
}
// Next, add any remaining keys from the original object
for (const key in jsonObject.Item) {
if (!reordered[key]) {
reordered[key] = jsonObject.Item[key];
}
}
return {Item: reordered};
}
async function fetchBackupFromS3(bucketName: string, key: string): Promise<any[]> {
const command = new GetObjectCommand({Bucket: bucketName, Key: key});
const response = await s3Client.send(command);
// Attribute renaming mapping
const attributeMapping = {
"pk": "templateID"
};
const orderedKeys = [
"templateID",
"name",
'createdBy'
];
if (response.Body) {
const readableStream = response.Body as Readable; // Type assertion
return new Promise((resolve, reject) => {
let data = '';
const gunzip = createGunzip();
readableStream.pipe(gunzip);
gunzip.on('data', chunk => data += chunk);
gunzip.on('end', () => {
// Split by newlines and filter out any empty strings
const lines = data.split('\n').filter(Boolean);
// Parse each line as JSON, and then rename attributes
const jsonObjects = lines.map(line => {
let jsonObject = JSON.parse(line);
jsonObject = renameAttributes(jsonObject, attributeMapping);
jsonObject = reorderJsonObject(jsonObject, orderedKeys)
return jsonObject
});
resolve(jsonObjects);
});
gunzip.on('error', reject);
});
} else {
throw new Error('The Body of the response is undefined.');
}
}
/**
* Converts a DynamoDB Item into a JSON object
*
* @param item The Item to convert
*/
function transformDynamoItem(item: any): any {
const transformed: { [key: string]: any } = {};
for (const key in item) {
for (const type in item[key]) {
transformed[key] = item[key][type];
}
}
return transformed;
}
/**
* Adjusts the style of the header row to be more nicer
*
* @param worksheet The worksheet to style
*/
function styleHeaderRow(worksheet: Worksheet) {
const headerRow = worksheet.getRow(1);
headerRow.eachCell((cell) => {
// Font
cell.font = {
bold: true,
color: {argb: 'FFFFFF'}, // White text
size: 14,
};
// Fill
cell.fill = {
type: 'pattern',
pattern: 'solid',
fgColor: {argb: 'FF4472C4'}, // Blue background
};
// Border
const border: Border = {
style: 'thin',
color: {argb: 'FF000000'}, // Black border
};
cell.border = {
top: border,
left: border,
bottom: border,
right: border,
};
});
}
/**
* Adjusts the column widths for certain columns to match the content length
*
* @param worksheet The Worksheet to modify
* @param data The data to use for sizing
* @param columnNames The columns we care about
*/
function adjustColumnWidths(worksheet: Worksheet, data: any[], columnNames: string[]) {
for (const columnName of columnNames) {
let maxContentLength = columnName.length; // At least as wide as the header
for (const item of data) {
if (item[columnName]) {
maxContentLength = Math.max(maxContentLength, item[columnName].length);
}
}
// Find the matching column in the worksheet by header and set its width
const column = worksheet.getColumn(columnName);
column.width = maxContentLength;
}
}
/**
* Convert the given data into an Excel worksheet
*
* @param data The data to convert
* @param outputPath The name of the excel file to write to
*/
async function convertToExcel(data: any[], outputPath: string) {
const workbook = new Workbook();
const worksheet = workbook.addWorksheet('DynamoDB Data');
const headers = Object.keys(data[0]);
worksheet.columns = headers.map(header => ({header, key: header}));
worksheet.addRows(data);
// Style the header
styleHeaderRow(worksheet);
// Adjust column widths for specific columns based on the first couple of rows
adjustColumnWidths(worksheet, data.slice(0, 2), ['templateID', 'name', 'createdBy']);
await workbook.xlsx.writeFile(outputPath);
console.log(`Excel file written to ${outputPath}`);
}
(async () => {
const bucketName = '<BACKUP_BUCKET>';
const prefix = 'AWSDynamoDB/<BACKUP_ID>/data/';
const outputFile = 'output.xlsx';
let allData: any[] = [];
try {
const allKeys = await listJsonFiles(bucketName, prefix);
for (const key of allKeys) {
const backupData = await fetchBackupFromS3(bucketName, key);
for (const item of backupData) {
allData.push(transformDynamoItem(item.Item));
}
}
await convertToExcel(allData, outputFile);
} catch (err) {
console.error('Error:', err);
}
})();
@mpurdon
Copy link
Author

mpurdon commented Aug 16, 2023

Supports the following features:

  • renaming Item attributes such as the PK column to ID
  • allows you to specify what columns should appear first, the rest are in the order they were exported
  • allows you to specify what columns should be the width of their content (eg ids, email, etc)

Install it with:
npm install ts-node typescript @types/node @aws-sdk/client-s3 exceljs

Run it with:
AWS_PROFILE=<your_profile> npx ts-node index.ts

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment