Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Query JSON with S3 Select in Node.js
// See tth.im/s3json for a full explanation of this code
const AWS = require('aws-sdk');
const S3 = new AWS.S3();
exports.handler = async (_, context) => {
try {
const query = 'SELECT * FROM s3object[*].results[*] r;';
// test query is against data from https://swapi.co/api/planets/?page=2
const bucket = 'test-bucket-for-s3-select-tutorial';
const key = 'planets.json';
const params = {
Bucket: bucket,
Key: key,
ExpressionType: 'SQL',
Expression: query,
InputSerialization: {
JSON: {
Type: 'DOCUMENT',
}
},
OutputSerialization: {
JSON: {
RecordDelimiter: ','
}
}
}
const data = await getDataUsingS3Select(params);
context.succeed(data);
} catch (error) {
context.fail(error);
}
};
const getDataUsingS3Select = async (params) => {
return new Promise((resolve, reject) => {
S3.selectObjectContent(params, (err, data) => {
if (err) { reject(err); }
if (!data) {
reject('Empty data object');
}
// This will be an array of bytes of data, to be converted
// to a buffer
const records = []
// This is a stream of events
data.Payload.on('data', (event) => {
// There are multiple events in the eventStream, but all we
// care about are Records events. If the event is a Records
// event, there is data inside it
if (event.Records) {
records.push(event.Records.Payload);
}
})
.on('error', (err) => {
reject(err);
})
.on('end', () => {
// Convert the array of bytes into a buffer, and then
// convert that to a string
let planetString = Buffer.concat(records).toString('utf8');
// remove any trailing commas
planetString = planetString.replace(/\,$/, '');
// Add into JSON 'array'
planetString = `[${planetString}]`;
try {
const planetData = JSON.parse(planetString);
resolve(planetData);
} catch (e) {
reject(new Error(`Unable to convert S3 data to JSON object. S3 Select Query: ${params.Expression}`));
}
});
});
})
}
@hpl002

This comment has been minimized.

Copy link

@hpl002 hpl002 commented Aug 30, 2020

Thanks for this!

Great starting point for what i needed, but i had a slight issue..

While you check for data at line 44, the remaining code will run, even after reject.

You could of course handle this elsewhere, but if a error handling mechanism is not implemented then this will cause a runtime error.

I fixed this by wrapping it in a else block.

For reference (with my alterations):

const getDataUsingS3Select = async (params) => {
  // 1
  return new Promise((resolve, reject) => {
    s3.selectObjectContent(params, (err, data) => {
      if (err) {
        reject(err);
      }

      if (!data) {
        reject("Empty data object");
      } 
//Wrapping in else. Data.Payload will throw a runtime error if data is null
else {
        // This will be an array of bytes of data, to be converted
        // to a buffer
        const records = [];

        // This is a stream of events
        data.Payload.on("data", (event) => {
          // There are multiple events in the eventStream, but all we
          // care about are Records events. If the event is a Records
          // event, there is data inside it
          if (event.Records) {
            records.push(event.Records.Payload);
          }
        })
          .on("error", (err) => {
            reject(err);
          })
          .on("end", () => {
            // Convert the array of bytes into a buffer, and then
            // convert that to a string
            let planetString = Buffer.concat(records).toString("utf8");

            // 2
            // remove any trailing commas
            planetString = planetString.replace(/\,$/, "");

            // 3
            // Add into JSON 'array'
            planetString = `[${planetString}]`;

            try {
              resolve(planetString);
            } catch (e) {
              reject(
                new Error(
                  `Unable to convert S3 data to JSON object. S3 Select Query: ${params.Expression}`
                )
              );
            }
          });
      }
    });
  });
};
@thetrevorharmon

This comment has been minimized.

Copy link
Owner Author

@thetrevorharmon thetrevorharmon commented Aug 31, 2020

@hpl002 this is great! Thanks for your comment!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.