Skip to content

Instantly share code, notes, and snippets.

@rodrigonehring
Created November 10, 2016 18:34
Show Gist options
  • Save rodrigonehring/30e4df6cc545c54d183ba6dfd1e03af5 to your computer and use it in GitHub Desktop.
Save rodrigonehring/30e4df6cc545c54d183ba6dfd1e03af5 to your computer and use it in GitHub Desktop.
Scraping kabum, terabyte and pichau product info
const request = require('request');
const cheerio = require('cheerio');
function priceToNumber(str) {
if (!str)
return 0;
if (typeof str !== 'string')
return str;
if (str.split(' ').filter(item => item == 'R$').length > 1)
str = str.replace(/R\$/g, '');
let value = str.substr(str.indexOf('R$') + 2);
value = value.replace('.', '');
value = value.replace(',', '.');
return parseFloat(value);
}
const getData = url => {
return new Promise((resolve, reject) => {
request(url, function(error, response, html) {
if (error)
reject(error);
const $ = cheerio.load(html);
let result = {};
if (url.indexOf('kabum') !== -1)
result = kabum($, result);
else if (url.indexOf('pichau') !== -1)
result = pichau($, result);
else if (url.indexOf('terabyte') !== -1)
result = terabyte($, result);
else
result.error = true;
result.url = url;
result.price = priceToNumber(result.price);
result.price2 = priceToNumber(result.price2);
resolve(result);
});
})
}
const kabum = ($, result) => {
result.title = $('h1.titulo_det').text();
result.price = $('.preco_desconto strong').text();
result.price2 = $('.preco_normal').text().trim();
result.images = [];
$('#imagem-slide li img').map(function() {
result.images.push($(this).attr('src'));
});
return result;
}
const pichau = ($, result) => {
result.title = $('.product-details h2').text().trim();
result.price = $('.payment .boleto span').first().text().trim();
result.price2 = $('.other .valorcartao').text().trim();
result.images = [];
$('.product-images ul.slides li img').map(function() {
result.images.push($(this).attr('src'));
});
return result;
}
function multiplyParcels(str, times) {
return priceToNumber(str) * times;
}
const terabyte = ($, result) => {
result.title = $('h1.tit-prod').text().trim();
result.price = $('p.val-prod').first().text().trim();
result.price2 = $('.val-parc span').text().trim();
result.images = [];
$('#Carousel img').map(function() {
result.images.push($(this).attr('src'));
});
result.price2 = multiplyParcels(result.price2, 12);
return result;
}
const urls = [
'http://www.pichau.com.br/placa-de-video-zotac-geforce-gtx-1060-3gb-mini-zt-p10610a-10l-box',
'http://www.terabyteshop.com.br/produto/6675/placa-de-video-zotac-geforce-gtx-1060-mini-6gb-zt-p10600a-10l-gddr5-pci-exp',
'http://www.kabum.com.br/produto/80910/placa-de-video-vga-zotac-gtx-1060-3gb-192bits-8008mhz-dvi-hdmi-dp-zt-p10610a-10l/?tag=1060',
];
urls.forEach(url => getData(url).then(console.log));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment