Skip to content

Instantly share code, notes, and snippets.

@thetrevorharmon
Last active December 13, 2023 23:43
Show Gist options
  • Save thetrevorharmon/6b831875cde98e5dc7f7458f6112b061 to your computer and use it in GitHub Desktop.
Save thetrevorharmon/6b831875cde98e5dc7f7458f6112b061 to your computer and use it in GitHub Desktop.
Query JSON with S3 Select in Node.js
// See tth.im/s3json for a full explanation of this code
const AWS = require('aws-sdk');
const S3 = new AWS.S3();
exports.handler = async (_, context) => {
try {
const query = 'SELECT * FROM s3object[*].results[*] r;';
// test query is against data from https://swapi.co/api/planets/?page=2
const bucket = 'test-bucket-for-s3-select-tutorial';
const key = 'planets.json';
const params = {
Bucket: bucket,
Key: key,
ExpressionType: 'SQL',
Expression: query,
InputSerialization: {
JSON: {
Type: 'DOCUMENT',
}
},
OutputSerialization: {
JSON: {
RecordDelimiter: ','
}
}
}
const data = await getDataUsingS3Select(params);
context.succeed(data);
} catch (error) {
context.fail(error);
}
};
const getDataUsingS3Select = async (params) => {
return new Promise((resolve, reject) => {
S3.selectObjectContent(params, (err, data) => {
if (err) { reject(err); }
if (!data) {
reject('Empty data object');
}
// This will be an array of bytes of data, to be converted
// to a buffer
const records = []
// This is a stream of events
data.Payload.on('data', (event) => {
// There are multiple events in the eventStream, but all we
// care about are Records events. If the event is a Records
// event, there is data inside it
if (event.Records) {
records.push(event.Records.Payload);
}
})
.on('error', (err) => {
reject(err);
})
.on('end', () => {
// Convert the array of bytes into a buffer, and then
// convert that to a string
let planetString = Buffer.concat(records).toString('utf8');
// remove any trailing commas
planetString = planetString.replace(/\,$/, '');
// Add into JSON 'array'
planetString = `[${planetString}]`;
try {
const planetData = JSON.parse(planetString);
resolve(planetData);
} catch (e) {
reject(new Error(`Unable to convert S3 data to JSON object. S3 Select Query: ${params.Expression}`));
}
});
});
})
}
@thetrevorharmon
Copy link
Author

@hpl002 this is great! Thanks for your comment!

@lucianosmori
Copy link

lucianosmori commented Jan 21, 2023

  "errorType": "ReferenceError",
  "errorMessage": "require is not defined in ES module scope, you can use import instead",
  "trace": [
    "ReferenceError: require is not defined in ES module scope, you can use import instead",
    "    at file:///var/task/index.mjs:3:13",
    "    at ModuleJob.run (node:internal/modules/esm/module_job:193:25)",
    "    at async Promise.all (index 0)",
    "    at async ESMLoader.import (node:internal/modules/esm/loader:530:24)",
    "    at async _tryAwaitImport (file:///var/runtime/index.mjs:921:16)",
    "    at async _tryRequire (file:///var/runtime/index.mjs:970:86)",
    "    at async _loadUserApp (file:///var/runtime/index.mjs:994:16)",
    "    at async UserFunction.js.module.exports.load (file:///var/runtime/index.mjs:1035:21)",
    "    at async start (file:///var/runtime/index.mjs:1200:23)",
    "    at async file:///var/runtime/index.mjs:1206:1"
  ]
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment