Skip to content

Instantly share code, notes, and snippets.

@tomykaira
Last active November 15, 2015 12:56
Show Gist options
  • Save tomykaira/602df5512d744c112587 to your computer and use it in GitHub Desktop.
Save tomykaira/602df5512d744c112587 to your computer and use it in GitHub Desktop.
PDF-ize DPZ articles with Phantomjs
var execFile = require("child_process").execFile,
page = require('webpage').create(),
system = require('system'),
workdir = null,
knownUrls = [],
files = []
;
function safeExit(exitCode) {
if (workdir)
execFile("rm", ["-rf", workdir])
phantom.exit(exitCode);
}
function processPage(nth, mth) {
page.viewportSize = { width: 1920, height: 1080 };
console.log(page.url);
if (knownUrls.indexOf(page.url) >= 0)
return composePdf(nth);
knownUrls.push(page.url);
window.setTimeout(function () {
page.evaluate(function() {
var main = document.querySelector('#mainContents');
if (!main) {
return null;
}
function loop(main) {
var prev = main.previousSibling, next = main.nextSibling;
while (prev) {
prev.parentElement.removeChild(prev);
prev = main.previousSibling;
}
while (next) {
next.parentElement.removeChild(next);
next = main.nextSibling;
}
if (main.tagName != 'BODY') {
loop(main.parentElement);
}
}
loop(main);
});
var rect = page.evaluate(function() {
var main = document.querySelector('#mainContents');
if (!main) {
return null;
}
return main.getBoundingClientRect();
});
if (rect == null) {
console.log('No main part. Page structure may have been changed.');
return safeExit(1);
}
page.viewportSize = { width: rect.width, height: rect.height };
page.clipRect = rect;
var file = workdir + '/' + mth + '.pdf';
files.push(file);
page.render(file);
var canContinue = page.evaluate(function() {
if(typeof(nextURL) !="undefined"){
location.href = nextURL;
return true;
}
if(typeof(rightURL) !="undefined"){
location.href = rightURL;
return true;
}
return false;
});
if (!canContinue) {
composePdf(nth);
}
}, 200);
}
function processArticle(nth) {
if (nth >= system.args.length) {
console.log("All done");
return safeExit(0);
}
console.log('Processing', system.args[nth]);
var mth = 0;
files = [];
knownUrls = [];
page.onLoadFinished = function(status) {
if (status !== 'success') {
console.log('Unable to load the address!');
return safeExit(1);
}
processPage(nth, mth);
mth += 1;
};
page.open(system.args[nth]);
}
function composePdf(nth) {
files.push(page.title.replace(/[ \/\\:*?"<>|]/, '') + '.pdf');
execFile("pdfunite", files, null, function(err, stdout, stderr) {
if (err) {
console.log("convert error", err, stdout, stderr);
safeExit(1);
}
processArticle(nth + 1);
});
}
execFile("mktemp", ["-d"], null, function(err, stdout, stderr) {
if (err) {
console.log("mktemp error", err, stdout, stderr);
safeExit(1);
}
workdir = stdout.replace('\n', '');
processArticle(1);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment