Skip to content

Instantly share code, notes, and snippets.

@kaushalye
Created June 25, 2017 07:17
A lambda function to import data from CSV files in S3 bucket to Redshift
var pg = require("pg");
/**
* Creates a table in Redshift and imports data from a CSV file in the S3 bucket.
* Make sure to specify the Environment variables for the connection string, s3 bucket and the role that have access to Resdshift cluster
*/
var pushData = function(context, entityName, schema) {
const conn = process.env.DB_CON_STR; //e.g., pg://user:pass@host:port/db_name
const s3Bucket = process.env.S3_BUCKET; //e.g., my.bucket.com
const credsRole = process.env.CREDS_ROLE; //e.g., arn:aws:iam::1234567890:role/myRedshiftRole
const client = new pg.Client(conn);
console.log("Reading "+entityName+" from bucket "+s3Bucket+" and pushing to redshift cluster");
const dropQry = 'DROP TABLE IF EXISTS '+entityName+ ';' ;
const createQry = 'CREATE TABLE IF NOT EXISTS '+entityName+'('+schema+');' ;
const copyQry = 'COPY '+entityName+' FROM \'s3://'+s3Bucket+'/'+entityName+'.csv\' credentials \'aws_iam_role='+credsRole+'\' region \'ap-southeast-2\' delimiter \',\' IGNOREHEADER 1 removequotes;'
const qry = dropQry + createQry + copyQry;
console.log("Executing query: "+qry);
client.connect();
client.query(qry).on("end", function (result) {
console.log(result);
client.end();
context.succeed("Done");
}).on("error", function (error) {
console.log(error);
client.end();
context.fail(error);
});
}
exports.handler = function(event, context) {
/*
Change the CSV file name and the sameple schema accordingly.
*/
pushData(context,
'Customers',
'id varchar(255), firstName varchar(255), lastName varchar(255), createdAt BIGINT, age INTEGER');
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment