Skip to content

Instantly share code, notes, and snippets.

@chopfitzroy
Forked from facholi/index.js
Created March 5, 2017 08:22
Show Gist options
  • Save chopfitzroy/2f1a3210ff4ac4395fe1b24f18381e92 to your computer and use it in GitHub Desktop.
Save chopfitzroy/2f1a3210ff4ac4395fe1b24f18381e92 to your computer and use it in GitHub Desktop.
Node.js script to runs on AWS Lambda. Converts the PDF pages to JPG images
var async = require("async");
var AWS = require("aws-sdk");
var gm = require("gm").subClass({imageMagick: true});
var fs = require("fs");
var mktemp = require("mktemp");
var PAGE_WIDTH = 1300,
PAGE_HEIGHT = 1300;
var utils = {
decodeKey: function(key) {
return decodeURIComponent(key).replace(/\+/g, ' ');
}
};
var s3 = new AWS.S3();
exports.handler = function(event, context, callback) {
//console.log('Received event:', JSON.stringify(event, null, 2));
var bucket = event.Records[0].s3.bucket.name,
srcKey = utils.decodeKey(event.Records[0].s3.object.key),
dstPrefix = srcKey.replace(/\.\w+$/, "") + '/',
fileType = srcKey.slice(-3, srcKey.length);
if (!fileType || fileType != 'pdf') {
var msg = "Invalid filetype found for key: " + srcKey;
callback(msg);
return;
}
console.log('starting the convertion process...');
function upload(data, filename) {
console.time("upload");
s3.putObject({
Bucket: bucket,
Key: dstPrefix + filename,
Body: data,
ContentType: "image/jpeg",
ACL: 'public-read',
Metadata: {
thumbnail: 'TRUE'
}
}, function(err, data) {
console.timeEnd("upload");
if (err) {
console.error(err);
return;
}
console.log('file ' + filename + ' was uploaded.');
});
}
function uploadAllPages() {
console.time("readdir");
fs.readdir('/tmp', function(err, files) {
console.timeEnd("readdir");
console.log(files.length + ' was generated: ' + files);
async.forEachOf(files, function(value, key, callback) {
console.time("readFile");
fs.readFile('/tmp/' + value, function(err, data) {
console.timeEnd("readFile");
if (err) {
console.error(err);
return;
}
upload(data, value);
});
}, function(err) {
if (err) {
callback(err)
} else {
console.log('process completed');
context.done();
}
});
});
}
async.waterfall([
function download(next) {
console.time("download");
//Download the image from S3
s3.getObject({
Bucket: bucket,
Key: srcKey
}, next);
},
function convertFile(response, next) {
console.timeEnd("download");
if (response.ContentType != 'application/pdf') {
var msg = "This file isn't a PDF."
console.error(msg);
callback(msg);
throw msg;
}
//var temp_file = mktemp.createFileSync("/tmp/XXXXXXXXXX.pdf")
//fs.writeFileSync(temp_file, response.Body);
//var gmFile = gm(temp_file); // all pages
//var gmFile = gm(temp_file + '[0-200]'); // first 200 pages
console.time("size");
gm(response.Body).size(function(err, size) {
console.timeEnd("size");
var scalingFactor = Math.min(PAGE_WIDTH / size.width, PAGE_HEIGHT / size.height),
width = scalingFactor * size.width,
height = scalingFactor * size.height;
console.log('Generating the images...');
console.time("resize");
this.density(144).borderColor('white').border(0, 0).setFormat("jpeg")
.resize(width, height).quality(100).adjoin().write("/tmp/page%06d", function(err) {
console.timeEnd("resize");
// if(temp_file) {
// fs.unlinkSync(temp_file);
// }
if (err) {
console.error(err);
} else {
console.log('the images has been extrated.')
}
next(err);
});
});
}
], function(err) {
if (err) {
console.error(
"Unable to generate the pages for '" + bucket + "/" + srcKey + "'" +
" due to error: " + err
);
} else {
console.log("Created pages for '" + bucket + "/" + srcKey + "'");
uploadAllPages();
}
});
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment