Example of using icrawler to scrape data from Amazon Top Reviewers (drafts)
var icrawler = require('icrawler'); | |
var fs = require('fs'); | |
var opts = { | |
concurrency: 10, | |
cookies: { | |
'x-main': 'MnygWH5s8Ue0n9iKEDQqmL5XJfpwVc7y', //use your cookies here | |
'session-id': '111-3333333-5555555', //use your cookies here | |
}, | |
headers: { | |
'X-Requested-With': 'XMLHttpRequest', | |
}, | |
}; | |
URL = Array(1000).fill('http://www.amazon.com/review/top-reviewers/?ie=UTF8&page=').map((v, i) => v + (i + 1)); | |
icrawler(URL, opts, function(task, $, _, res){ | |
if (!task.name) { | |
$('table.CMpaginateBar+table.crDataGrid>tr').slice(1).each(function(){ | |
var id = $(this).find('td>a').eq(1).attr('name'); | |
_.push({ | |
rank: $(this).find('td').first().text().replace(/\D*(\d+)\D*/g, '$1'), | |
name: $(this).find('b').text(), | |
link: 'http://www.amazon.com/gp/pdp/profile/' + id + '/', | |
url: 'http://www.amazon.com/gp/profile/'+ id +'/customer_email', | |
}); | |
}); | |
} else { | |
_.save({ | |
rank: task.rank, | |
name: task.name, | |
email: $.data.email, | |
url: task.link, | |
}); | |
_.step(); | |
} | |
}, function(results){ | |
fs.writeFileSync('data.json', JSON.stringify(results, null, 4), 'utf8'); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment