Skip to content

Instantly share code, notes, and snippets.

@himulawang
Created February 18, 2020 10:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save himulawang/33f8740eddfd44a626ec379175427b36 to your computer and use it in GitHub Desktop.
Save himulawang/33f8740eddfd44a626ec379175427b36 to your computer and use it in GitHub Desktop.
Headless Chrome Download Images
const puppeteer = require('puppeteer');
const fs = require("fs");
let crawlPage = async (url, pageID) => {
const browser = await puppeteer.launch({headless:true, });
const page = await browser.newPage();
// prevent
// TimeoutError: Navigation timeout of 30000 ms exceeded
await page.setDefaultNavigationTimeout(0);
await page.goto(url);
await page.waitFor(1000);
const nodes = await page.evaluate(() => {
const images = document.querySelectorAll(`.chimg_photo`);
return [].map.call(images, img => img.src);
});
for (let i = 0; i < nodes.length; i++) {
let imgURL = nodes[i];
let imgPath = imgURL.split('/');
let imgName = imgPath[imgPath.length - 1];
let viewSource = await page.goto(imgURL);
fs.writeFile("./img/" + imgName, await viewSource.buffer(), function (err) {
if (err) {
return console.log(err);
}
console.log('PageID: '+ pageID + ', ' + imgName + ' downloaded.');
});
}
await browser.close();
};
(async() => {
for (let i = 60; i > 50; i--) {
await crawlPage('http://program.tving.com/tvn/cloy/5/Board/View?page=1&b_seq=' + i, i);
}
})();
(async() => {
for (let i = 50; i > 40; i--) {
await crawlPage('http://program.tving.com/tvn/cloy/5/Board/View?page=1&b_seq=' + i, i);
}
})();
(async() => {
for (let i = 30; i > 20; i--) {
await crawlPage('http://program.tving.com/tvn/cloy/5/Board/View?page=1&b_seq=' + i, i);
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment