Skip to content

Instantly share code, notes, and snippets.

@lpar
Created Mar 18, 2019
Embed
What would you like to do?
scurl.js: fetch web pages, process them with Mozilla Firefox Readability, write the results as html files to squirrel away
const request = require("request");
const Readability = require("readability");
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const fs = require("fs");
function makeCallback(url) {
return function callback(error, response, body) {
if (error || response.statusCode != 200) {
return
}
var doc = new JSDOM(body, { url: url });
var reader = new Readability(doc.window.document);
var article = reader.parse();
var newfile = "<!DOCTYPE html><html><head><meta charset=utf-8 />\n" +
"<title>" + article.title + "</title>\n" +
"<base href='" + url + "'></head><body>" +
"<p><a href='" + url + "'>" + url + "</a></p>\n" +
"<h1>" + article.title + "</h1>\n" +
article.content + "</body></html>";
var newfilename = article.title.replace(/[^\w.-\s]+$/, "") + ".html";
fs.writeFile(newfilename, newfile, function (err) {
if (err) {
console.log(err);
}
});
console.log("wrote " + article.title);
}
}
var help = true;
for (var i = 1; i < process.argv.length; i++) {
url = process.argv[i];
if (url.match(/^http/)) {
help = false;
console.log(" get " + url);
request({ url: url}, makeCallback(url));
}
}
if (help) {
console.log("scurl.js <url> [<url> ...]");
console.log("Writes readability-processed web pages to current directory.");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment