Last active
February 15, 2019 02:17
-
-
Save ckhung/b3938a092d8dd0641c1d2b73ef809cf9 to your computer and use it in GitHub Desktop.
web scraper capable of downloading javascript-generated content using puppeteer.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
// http://toddhayton.com/2018/08/01/scraping-with-puppeteer/ | |
// usage example: | |
// ./pptscraper.js https://www.cwb.gov.tw/V7/observe/real/46744.htm > a.htm | |
const homedir = require('os').homedir(); | |
const puppeteer = require(homedir + '/node_modules/puppeteer'); | |
const url = process.argv[2]; | |
const sel = process.argv[3]; | |
async function main() { | |
const browser = await puppeteer.launch(); | |
const page = await browser.newPage(); | |
await page.goto(url); | |
await page.waitForSelector(sel ? sel : 'body'); | |
console.log(await page.content()); | |
await browser.close(); | |
} | |
main(); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment