Skip to content

Instantly share code, notes, and snippets.

@decodedmrq
Forked from anhdiepmmk/dantri.com.js
Created June 14, 2021 16:05
Show Gist options
  • Save decodedmrq/0356cab14df6e8832d85310b1f9bdcae to your computer and use it in GitHub Desktop.
Save decodedmrq/0356cab14df6e8832d85310b1f9bdcae to your computer and use it in GitHub Desktop.
Sử dụng puppeteer, cheerio của js để crawler trang web có ajax
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
(async () => {
const browser = await puppeteer.launch({headless: false, devtools: true})
const page = await browser.newPage()
await page.goto('https://dantri.com.vn/')
//https://github.com/cheeriojs/cheerio
let content = await page.content();
var $ = cheerio.load(content);
$('a').each(function(i, element){
let a = $(this)
let title = a.text();
let url = a.attr('href');
console.log(`title: ${title} url: ${url}`)
});
//await browser.close();
})()
{
"name": "desktop",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"cheerio": "^1.0.0-rc.3",
"puppeteer": "^2.0.0"
}
}
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({});
const page = await browser.newPage();
await page.setViewport({width:1440, height:900, deviceScaleFactor: 2});
await page.goto('http://kenh14.vn/', { waitUntil: "networkidle2" })
await page.pdf({path: 'muong14.pdf', format: 'A4', printBackground: true, pageRanges: '1'})
await browser.close();
})();
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({headless: false})
const page = await browser.newPage()
await page.goto('http://www.nightmarejs.org/')
const results = await page.evaluate(() => {
console.log(document.documentElement.outerHTML)
let items = document.querySelectorAll('a')
let links = []
items.forEach((item) => {
links.push({
title: item.innerText,
url: item.getAttribute('href'),
})
})
return links;
});
console.log(results)
// Do what you want with the `results`
await browser.close()
})()
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({headless: false})
const page = await browser.newPage()
await page.goto('http://www.nightmarejs.org/')
let html = await page.content();
console.log(html);
await browser.close()
})()
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://example.com');
await page.screenshot({path: 'example.png'});
await browser.close();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment