Skip to content

Instantly share code, notes, and snippets.

@astur astur/icrawler-lis.js
Last active Jun 9, 2016

Embed
What would you like to do?
Example of using icrawler to scrape data from PuntoLIS.it
var icrawler = require('icrawler');
var fs = require('fs');
var sCookie = 'http://www.puntolis.it/storelocator/defaultsearch.aspx?idcustomer=111'; // Only for cookies
var URL = 'http://www.puntolis.it/storelocator/buildMenuProv.ashx?CodSer=111'; // Start URL. List of 'provincia's
var sLoc = 'http://www.puntolis.it/storelocator/buildMenuLoc.ashx?CodSer=111&ProvSel=%s'; // List of 'localita's for every provincia (PROV)
var sMarker = 'http://www.puntolis.it/storelocator/Result.aspx?provincia=%s&localita=%s&cap=XXXXX&Servizio=111'; // (PROV, LOC)
var opts = {
errorsFirst: true,
delay: 30000
};
opts.init = function(needle, log, cb){
needle.get(sCookie, {}, function(err, res){
if (err) return cb(err);
cb(null, res.cookies, {});
});
};
opts.save = function(tasks, results){
fs.writeFileSync('./data.json', JSON.stringify({tasks: tasks, results: results}, null, 4))
};
if(fs.existsSync('./data.json')){
var data = JSON.parse(fs.readFileSync('./data.json', 'utf8'));
if (data.tasks.length === 0) {
console.log('All tasks done');
process.exit(0);
}
opts.tasks = data.tasks;
opts.results = data.results;
}
icrawler(URL, opts, function(url, $, _, res){
if ($('#TendinaProv').length > 0) {
$('#TendinaProv option').slice(1).each(function() {
_.push(sLoc.replace('%s', $(this).attr('value')));
});
} else if ($('select[onchange="onLocSelect()"]').length > 0) {
$('option').slice(1).each(function() {
_.push(sMarker.replace('%s', url.slice(-2)).replace('%s', $(this).attr('id')));
});
_.log('===' + url.slice(-2) + '===')
} else {
$('marker').each(function() {
_.save({
Title: $(this).attr('insegna').trim(),
Address: $(this).attr('indirizzo').trim(),
Place: [
$(this).attr('cap').trim(),
$(this).attr('localita').trim(),
$(this).attr('provincia').trim()
].join(' ')
});
_.step();
});
}
}, function(result){
require('excelize')(result, './', 'ADDR.xlsx', 'sheet', function(err){
if (err) throw err;
console.log(result.length + ' adresses saved.');
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.