Skip to content

Instantly share code, notes, and snippets.

@taisang1996
Created January 17, 2017 08:57
Show Gist options
  • Save taisang1996/4c9f9c74339e57fb58ec34a7efbadcfb to your computer and use it in GitHub Desktop.
Save taisang1996/4c9f9c74339e57fb58ec34a7efbadcfb to your computer and use it in GitHub Desktop.
crap thông tin sản phẩm của maihoang.com.vn
const got = require('got')
const cheerio = require('cheerio')
let parseProductFromLink = (url) => {
url = encodeURI(url)
got(url)
.then(response => cheerio.load(response.body))
.then($ => {
$('.pro-item .pro-img a').map((i, a) =>
parseProductInfo($(a).attr('href'))
)
return $
})
.then($ => {
if ($('.pagination').find('span').next('a').text() != 'Trang cuối') {
let urlNext = $('.pagination').find('span').next('a').attr('href')
parseProductFromLink(urlNext)
}
})
.catch(error => console.log(error))
}
let parseProductInfo = (url) => {
url = encodeURI(url)
console.log(url)
got(url)
.then(response => cheerio.load(response.body))
.then($ => {
let ten = $('.product-home h1').first().text()
let gia = $('.product-home .price').text().replace(/\D/g, '')
let hinhanh = $('.product-home .tiny-img img').map((i, img) => $(img).attr('src')).get()
let loaisanpham = $('.thanhdieuhuong li').last().prev().text()
// bug.
// work well on js browser
// nodejs not work
// you can try here https://regex101.com/r/WIcJ8S/1
let hangsanxuat = (/Hãng sản xuất: (\w+)<br>/g).exec($('.thongtinsanpham .desc-info').html())[1]
console.log(hangsanxuat)
// console.log({
// ten,
// gia,
// hinhanh,
// loaisanpham,
// hangsanxuat
// })
})
.catch(error => console.log(error))
}
parseProductFromLink('http://maihoang.com.vn/main-bo-mach-chu')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment