Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Example of using icrawler to scrape data from PuntoLIS.it
var icrawler = require('icrawler');
var fs = require('fs');
var sCookie = 'http://www.puntolis.it/storelocator/defaultsearch.aspx?idcustomer=111'; // Only for cookies
var URL = 'http://www.puntolis.it/storelocator/buildMenuProv.ashx?CodSer=111'; // Start URL. List of 'provincia's
var sLoc = 'http://www.puntolis.it/storelocator/buildMenuLoc.ashx?CodSer=111&ProvSel=%s'; // List of 'localita's for every provincia (PROV)
var sMarker = 'http://www.puntolis.it/storelocator/Result.aspx?provincia=%s&localita=%s&cap=XXXXX&Servizio=111'; // (PROV, LOC)
var opts = {
errorsFirst: true,
delay: 30000
};
opts.init = function(needle, log, cb){
needle.get(sCookie, {}, function(err, res){
if (err) return cb(err);
cb(null, res.cookies, {});
});
};
opts.save = function(tasks, results){
fs.writeFileSync('./data.json', JSON.stringify({tasks: tasks, results: results}, null, 4))
};
if(fs.existsSync('./data.json')){
var data = JSON.parse(fs.readFileSync('./data.json', 'utf8'));
if (data.tasks.length === 0) {
console.log('All tasks done');
process.exit(0);
}
opts.tasks = data.tasks;
opts.results = data.results;
}
icrawler(URL, opts, function(url, $, _, res){
if ($('#TendinaProv').length > 0) {
$('#TendinaProv option').slice(1).each(function() {
_.push(sLoc.replace('%s', $(this).attr('value')));
});
} else if ($('select[onchange="onLocSelect()"]').length > 0) {
$('option').slice(1).each(function() {
_.push(sMarker.replace('%s', url.slice(-2)).replace('%s', $(this).attr('id')));
});
_.log('===' + url.slice(-2) + '===')
} else {
$('marker').each(function() {
_.save({
Title: $(this).attr('insegna').trim(),
Address: $(this).attr('indirizzo').trim(),
Place: [
$(this).attr('cap').trim(),
$(this).attr('localita').trim(),
$(this).attr('provincia').trim()
].join(' ')
});
_.step();
});
}
}, function(result){
require('excelize')(result, './', 'ADDR.xlsx', 'sheet', function(err){
if (err) throw err;
console.log(result.length + ' adresses saved.');
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment