Created
September 7, 2018 05:39
-
-
Save kimihito/0c0c68c17bcf6b4545166fc6ce778f03 to your computer and use it in GitHub Desktop.
mozilla/readability を使った本文抽出サンプル
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import puppeteer from 'puppeteer' | |
const Readability = require("readability"); | |
import { JSDOM } from 'jsdom' | |
const URL = 'url' | |
(async () => { | |
const browser = await puppeteer.launch({ | |
args: [ | |
'--no-sandbox', | |
'--disable-setuid-sandbox', | |
'--lang=ja,en-US,en' | |
] | |
}); | |
const page = await browser.newPage(); | |
await page.goto(url); | |
const html = await page.evaluate(() => { | |
return document.body.innerHTML | |
}) | |
const dom = new JSDOM(html) | |
const content = new Readability(dom.window.document).parse() | |
console.log(content) | |
await browser.close(); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment