Skip to content

Instantly share code, notes, and snippets.

@IgorDePaula
Forked from diogoduailibe/test.js
Created May 22, 2016 13:33
Show Gist options
  • Save IgorDePaula/8b3cff1586591418bea40f7bc07b416d to your computer and use it in GitHub Desktop.
Save IgorDePaula/8b3cff1586591418bea40f7bc07b416d to your computer and use it in GitHub Desktop.
var cheerio = require('cheerio'),
http = require('http'),
max_articles=2,
host = 'http://painel.blogfolha.uol.com.br';
var options = {
host: host,
path: '/',
method: 'GET',
headers:{
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'en-US,en;q=0.8,pt;q=0.6',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'Host':'painel.blogfolha.uol.com.br',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
}
},
data = '';
var request = http.request(options,function(ret){
ret.setEncoding('utf8');
ret.on('data',function(d){
data+=d;
});
ret.on('end',function(){
//pegando os links e os textos finais
parseSite(data,function(result){
for (var x in result){
console.log(result[x]['link']);
console.log(result[x]['text']);
}
});
});
});
request.end();
function parseSite(data,callback){
var $ = cheerio.load(data),result=[];
var articles = $('article > header > h2 > a').toArray();
function parseLink(next,callback){
if (next == max_articles)
callback();
else{
var link = $(articles[next]).attr('href');
var options = {
host: host,
path: link.substring(host.length),
method: 'GET',
headers:{
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'en-US,en;q=0.8,pt;q=0.6',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'Host':'painel.blogfolha.uol.com.br',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
}
},
data = '';
var request = http.request(options,function(ret){
ret.setEncoding('utf8');
ret.on('data',function(d){
data+=d;
});
ret.on('end',function(){
var $1 = cheerio.load(data);
var text = $1('.entry-content').text();
result.push({'link':link, 'text': text});
parseLink(next+1,function(){
callback();
})
});
});
request.end();
}
}
parseLink(0, function(){
callback(result);
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment