Example of using icrawler to scrape data from Ferra.ru
var icrawler = require('icrawler'); | |
var fs = require('fs'); | |
var URL = 'http://www.ferra.ru/ru/techlife/news/'; | |
var opts = { | |
errorsFirst: true, | |
concurrency: 10, | |
saveOnFinish: false, | |
saveOnCount: 500, | |
asyncParse: true, | |
file: './data.json', | |
}; | |
icrawler(URL, opts, function(url, $, _, res){ | |
if ($('div.b-option-nav').length < 1) { | |
return _.cb(true); | |
} | |
if($('.b_infopost').contents().eq(2).text().trim().slice(0, -1) === 'Алексей Козлов'){ | |
_.save({ | |
title: $('h1').text(), | |
date: $('.b_infopost>.date').text(), | |
href: url, | |
size: $('.newsbody').text().length | |
}); | |
_.step(); | |
} | |
$('.b_rewiev p>a').each(function() { | |
_.push($(this).attr('href')); | |
}); | |
$('.bpr_next>a').slice(0,1).each(function() { | |
_.push($(this).attr('href')); | |
}); | |
_.cb(); | |
}, function(result){ | |
fs.writeFileSync('./data.json', JSON.stringify(result, null, 4)) | |
console.log('Results saved'); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment