Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Example of using icrawler to scrape data from Amazon Top Reviewers (drafts)
var icrawler = require('icrawler');
var fs = require('fs');
var opts = {
concurrency: 10,
cookies: {
'x-main': 'MnygWH5s8Ue0n9iKEDQqmL5XJfpwVc7y', //use your cookies here
'session-id': '111-3333333-5555555', //use your cookies here
},
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
};
URL = Array(1000).fill('http://www.amazon.com/review/top-reviewers/?ie=UTF8&page=').map((v, i) => v + (i + 1));
icrawler(URL, opts, function(task, $, _, res){
if (!task.name) {
$('table.CMpaginateBar+table.crDataGrid>tr').slice(1).each(function(){
var id = $(this).find('td>a').eq(1).attr('name');
_.push({
rank: $(this).find('td').first().text().replace(/\D*(\d+)\D*/g, '$1'),
name: $(this).find('b').text(),
link: 'http://www.amazon.com/gp/pdp/profile/' + id + '/',
url: 'http://www.amazon.com/gp/profile/'+ id +'/customer_email',
});
});
} else {
_.save({
rank: task.rank,
name: task.name,
email: $.data.email,
url: task.link,
});
_.step();
}
}, function(results){
fs.writeFileSync('data.json', JSON.stringify(results, null, 4), 'utf8');
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.