Skip to content

Instantly share code, notes, and snippets.

@williamtsoi1
Last active September 1, 2020 07:48
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save williamtsoi1/104531c65852a852399a3dc1096a2dcc to your computer and use it in GitHub Desktop.
Save williamtsoi1/104531c65852a852399a3dc1096a2dcc to your computer and use it in GitHub Desktop.
a lambda function to automatically take ownership of any objects written into an s3 bucket. Inspired by https://gist.github.com/joech4n/953c1cd6a36698c5d120
console.log('Loading event');
var aws = require('aws-sdk');
var s3 = new aws.S3({apiVersion: '2006-03-01'});
var sqs = new aws.SQS({apiVersion: '2012-11-05'});
exports.handler = function(event, context, callback) {
s3.listBuckets(function(err,data) {
if (err) {
console.log('ERROR: Problem getting list of buckets. This should have something to do with incorrect IAM permissions for the lambda.');
errorMessage = 'ERROR: Error from S3: '+err;
console.log(errorMessage);
context.done('error', errorMessage);
} else {
var canonicalAccountId = data.Owner.ID;
console.log('Received event:');
console.log(JSON.stringify(event, null, ' '));
// TODO: This is only a hack. The delay needs to be removed after Talend jobs fix the ACL issue
console.log('Delaying for 180 seconds...');
setTimeout(function() {
// This converts the SNS message into the same format as from S3 directly
var s3Event = event.Records[0].Sns.Message;
// Get the data from the event
var bucket = s3Event.Records[0].s3.bucket.name;
var key = s3Event.Records[0].s3.object.key;
var getOwnerParams = {
Bucket: bucket, /* required */
Key: decodeURIComponent(key), /* required */
};
// Gets the owner of the S3 Object
s3.getObjectAcl(getOwnerParams, function(err, data) {
if (err) {
console.log('ERROR: Problem getting the ACL. This should have something to do with the object not applying the bucket-owner-full-control ACL.');
errorMessage = 'ERROR: Error from S3: '+err;
console.log(errorMessage);
context.done('error', errorMessage);
} else {
// If the owner is not the correct, then overwrite itself, else do nothing
var objectOwnerId = data.Owner.ID;
if (objectOwnerId != canonicalAccountId) {
console.log("INFO: Object owner not correct. overwriting...");
overwriteS3Object();
} else {
console.log("INFO: Object owner correct. Pushing event into BucketMapper SQS...");
var queueUrl = process.env.BUCKETMAPPER_QUEUE_URL;
var bucketMapperQueueParams = {
QueueUrl: queueUrl,
MessageBody: JSON.stringify(s3Event, null, ' ')
};
console.log('Writing to bucketmapper queue for bucket:' + bucket + ', key: ' + key);
sqs.sendMessage(bucketMapperQueueParams, function (err, data) {
if (err) {
var errorMessage = 'ERROR: Error creating SQS message: for bucket:' + bucket + ', key: ' + key + '. ' + err;
console.log(errorMessage);
callback(errorMessage);
} else {
var successMessage = 'SUCCESS: Created SQS message for bucket:' + bucket + ', key: ' + key;
console.log(successMessage);
callback(null, successMessage);
}
});
}
}
});
function overwriteS3Object() {
// Configure params for CopyObject: http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#copyObject-property
var copyParams = {
Bucket: bucket, /* required */
CopySource: bucket + '/' + key, /* required */
Key: decodeURIComponent(key), /* required; URL-decoded because event URL-encodes the key.
For some reason, CopySource does not require this */
StorageClass: 'STANDARD', /* Must include this to be able to overwrite a file */
ServerSideEncryption: 'AES256' /* server side encryption */
};
s3.copyObject(copyParams,
function(err,data) {
if (err) {
console.log('ERROR: Problem overwriting ' + bucket + '/' +key + '. Make sure they exist and your bucket is in the same region as this function.');
errorMessage = 'ERROR: Error from S3: '+err;
console.log(errorMessage);
context.done('error', errorMessage);
}
else {
console.log('SUCCESS: Overwritten ' + bucket + '/' + key);
context.done(null,'');
}
}
);
}
}, 180000);
}
});
};
@nbrys
Copy link

nbrys commented Mar 20, 2019

Hi, could you point me in the direction why the timeout hack is introduced in this script?

https://gist.github.com/williamtsoi1/104531c65852a852399a3dc1096a2dcc#file-lake-s3-object-take-ownership-js-L19

@riteshchaman
Copy link

+1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment