Skip to content

Instantly share code, notes, and snippets.

@cxa
Created December 1, 2022 04:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cxa/aa7845741b7d45ee32de9ca5711e0c83 to your computer and use it in GitHub Desktop.
Save cxa/aa7845741b7d45ee32de9ca5711e0c83 to your computer and use it in GitHub Desktop.
#!/usr/bin/env node
// npm i -g @mozilla/readability puppeteer
const puppeteer = require("puppeteer");
const fs = require("fs");
const readabilityJsStr = fs.readFileSync(
require.resolve("@mozilla/readability/Readability.js"),
{ encoding: "utf-8" }
);
function executor() {
return new Readability({}, document).parse();
}
const dump = async (url) => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const resultArticle = await page.evaluate(`
(function(){
${readabilityJsStr}
${executor}
return executor();
}())
`);
const content = `
<!doctype html>
<html>
<head>
<title>${resultArticle.title}</title>
</head>
<body>
${resultArticle.content}
</body>
</html>`;
browser.close();
return content;
};
const main = async () => {
try {
const html = await dump(process.argv[2]);
console.log(html);
process.exit(0);
} catch (e) {
console.error(e);
process.exit(1);
}
};
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment