-
-
Save madhavpalshikar/96e72889c534443caefd89000b2e69b5 to your computer and use it in GitHub Desktop.
const https = require('https'); | |
const path = require('path'); | |
const fs = require('fs'); | |
var async = require('async'); | |
const {writeFileSync} = require('fs'); | |
const lambdafs = require('lambdafs'); | |
const {execSync} = require('child_process'); | |
var AWS = require('aws-sdk'); | |
const inputPath = path.join( '/opt', 'lo.tar.br'); | |
const outputPath = '/tmp/'; | |
const bucketName = 'doc-conversion-test'; | |
module.exports.handler = async (event, context) => { | |
console.log(execSync('ls -alh /opt').toString('utf8')); | |
try { | |
// Decompressing | |
let decompressed = { | |
file: await lambdafs.inflate(inputPath) | |
}; | |
console.log('output brotli de:----', decompressed); | |
} catch (error) { | |
console.log('Error brotli de:----', error); | |
} | |
try { | |
console.log(execSync('ls -alh /opt').toString('utf8')); | |
} catch (e) { | |
console.log(e); | |
} | |
var body = ""; | |
//S3 put event | |
body = event.Records[0].body; | |
console.log('s3 bucket file name from event:', body); | |
// get file from s3 bucket | |
var s3fileName = body; | |
var newFileName = Date.now()+'.pdf'; | |
var s3 = new AWS.S3({apiVersion: '2006-03-01'}); | |
var fileStream = fs.createWriteStream('/tmp/'+s3fileName); | |
var getObject = function(keyFile) { | |
return new Promise(function(success, reject) { | |
s3.getObject( | |
{ Bucket: bucketName, Key: keyFile }, | |
function (error, data) { | |
if(error) { | |
reject(error); | |
} else { | |
success(data); | |
} | |
} | |
); | |
}); | |
} | |
let fileData = await getObject(s3fileName); | |
try{ | |
fs.writeFileSync('/tmp/'+s3fileName, fileData.Body); | |
} catch(err) { | |
// An error occurred | |
console.error('file write:', err); | |
} | |
const convertCommand = `export HOME=/tmp && /tmp/lo/instdir/program/soffice.bin --headless --norestore --invisible --nodefault --nofirststartwizard --nolockcheck --nologo --convert-to "pdf:writer_pdf_Export" --outdir /tmp /tmp/${s3fileName}`; | |
try { | |
console.log(execSync(convertCommand).toString('utf8')); | |
} catch (e) { | |
console.log(execSync(convertCommand).toString('utf8')); | |
} | |
console.log(execSync('ls -alh /tmp').toString('utf8')); | |
function uploadFile(buffer, fileName) { | |
return new Promise((resolve, reject) => { | |
s3.putObject({ | |
Body: buffer, | |
Key: fileName, | |
Bucket: bucketName, | |
}, (error) => { | |
if (error) { | |
reject(error); | |
} else { | |
resolve(fileName); | |
} | |
}); | |
}); | |
} | |
let fileParts = s3fileName.substr(0, s3fileName.lastIndexOf(".")) + ".pdf"; | |
let fileB64data = fs.readFileSync('/tmp/'+fileParts); | |
await uploadFile(fileB64data, 'pdf/'+fileParts); | |
console.log('new pdf converted and uploaded!!!'); | |
}; |
yes, it works fine for 99%
I used this code to convert from docx to html -- worked perfectly with a few tweaks..
i also need it to convert it to pdf -- but the pdf which is getting generated is empty, not sure whats wrong.
import {execSync} from "child_process";
export class WordToHtmlConvertor {
/**
- This function handles the s3 upload event and converts the file to the specified format ,and uploads it to S3 again
- @param {any} event - The uploaded s3 objects bucket and key
*/
static convertWordToHtml = async (bucketName: String, key: String) => {
const fs = require("fs");
const params = {
Bucket: bucketName,
Key: key,
};
const AWS = require("aws-sdk");
const s3 = new AWS.S3();
await s3.getObject(params, function(err, data){
if (err) {
console.error(err.code, "-", err.message);
}
/*fs.writeFile('/tmp/' + key, data.Body, function(err){
if(err) {
console.log(err.code, "-", err.message);
}
});*/
fs.writeFileSync('/tmp/'+ key, data.Body);
});
try {
const path = require('path');
const inputPath = path.join( '/opt', 'lo.tar.br');
const lambdafs = require('lambdafs');
// Decompressing
let decompressed = {
file: await lambdafs.inflate(inputPath)
};
console.log('Libre Office Brotli layer decompressed', decompressed);
} catch (error) {
console.log('Error decompressing', error);
}
//let newFile = key.substr(0, key.lastIndexOf(".")) + ".html";
let newFile = key.substr(0, key.lastIndexOf(".")) + ".pdf";
console.log(newFile);
try {
//console.log( execSync(`export HOME=/tmp && /tmp/lo/instdir/program/soffice.bin --headless --norestore --invisible --nodefault --nofirststartwizard --nolockcheck --nologo --convert-to html:HTML --outdir /tmp /tmp/TEst.docx`));
console.log( execSync(`export HOME=/tmp && /tmp/lo/instdir/program/soffice.bin --headless --norestore --invisible --nodefault --nofirststartwizard --nolockcheck --nologo --convert-to "pdf:writer_pdf_Export" --outdir /tmp /tmp/TEst.docx`));
} catch (e) {
//console.log( execSync(`export HOME=/tmp && /tmp/lo/instdir/program/soffice.bin --headless --norestore --invisible --nodefault --nofirststartwizard --nolockcheck --nologo --convert-to html:HTML --outdir /tmp /tmp/TEst.docx`));
console.log( execSync(`export HOME=/tmp && /tmp/lo/instdir/program/soffice.bin --headless --norestore --invisible --nodefault --nofirststartwizard --nolockcheck --nologo --convert-to "pdf:writer_pdf_Export" --outdir /tmp /tmp/TEst.docx`));
}
console.log(execSync('cat /tmp/TEst.docx').toString('utf8'));
console.log(execSync('cat /tmp/TEst.pdf').toString('utf8'));
let fileData = fs.readFileSync('/tmp/' + newFile, {encoding: 'binary'});
await uploadFile(fileData, newFile).then(function (result) {
console.log("Uploaded to s3:", result);
}).catch(function (err) {
console.error("Error uploading file to S3:", err.toString());
});
console.log('New Document converted and uploaded');
function uploadFile(buffer, fileName) {
console.log("Inside upload File "+ buffer.toString() + fileName);
return new Promise(function(resolve, reject) {
s3.putObject(
{
Bucket: bucketName,
Key: fileName,
Body: buffer,
//ContentType: 'text/html',
ContentType: 'application/pdf',
}
).promise().then(resolve, reject);
});
}
}
};
can you please help ?
this worked with some editing:
- had to give s3 bucket read and write access for my lambda function in IAM
- "body = event.Records[0].body;" returns undefined -> I just manually changed it to my filename (ex. file.docx)
the pdf looks great, thank you!
working with a little modification for amplify
https://gist.github.com/diveshqss/1fc4a81a47db2e24f1401e50984dc075
I have been using this blog (https://madhavpalshikar.medium.com/converting-office-docs-to-pdf-with-aws-lambda-372c5ac918f1) for my conversion project. I have followed all steps in this blog, up till step 6 - of uploading code.. bt i don't know how to test this now. I'm not able to upload doc and then see if it converts...Im a novice to AWS and im experimeting. so if you could help me with that, i will be beyond gratefull. Plz reach out to me at - priyanshikathuria@gmail.com
Thanks @madhavpalshikar for sharing this. You just saved my day.
Kudos!!
Hi
I have tried to follow the instructions on your post but every time I test the Lambda Function I get the below error from Lambda:
{
"errorType": "TypeError",
"errorMessage": "Cannot read properties of undefined (reading '0')",
"trace": [
"TypeError: Cannot read properties of undefined (reading '0')",
" at Runtime.module.exports.handler (/var/task/index/index.js:36:26)",
" at runMicrotasks ()",
" at processTicksAndRejections (node:internal/process/task_queues:96:5)"
]
}
I'm pretty sure it is causing a loop @ line 36
body = event.Records[0].body;
Does anyone have any ideas how to fix this issue please?
Thanks
M
I have tried a few different AWS Lambda OpenOffice PDF generators over the past weeks and this is the only one that has worked with current Amazon Linux 2/Lambda constraints for a wide variety of files.