Example of using icrawler to scrape data from PuntoLIS.it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var icrawler = require('icrawler'); | |
var fs = require('fs'); | |
var sCookie = 'http://www.puntolis.it/storelocator/defaultsearch.aspx?idcustomer=111'; // Only for cookies | |
var URL = 'http://www.puntolis.it/storelocator/buildMenuProv.ashx?CodSer=111'; // Start URL. List of 'provincia's | |
var sLoc = 'http://www.puntolis.it/storelocator/buildMenuLoc.ashx?CodSer=111&ProvSel=%s'; // List of 'localita's for every provincia (PROV) | |
var sMarker = 'http://www.puntolis.it/storelocator/Result.aspx?provincia=%s&localita=%s&cap=XXXXX&Servizio=111'; // (PROV, LOC) | |
var opts = { | |
errorsFirst: true, | |
delay: 30000 | |
}; | |
opts.init = function(needle, log, cb){ | |
needle.get(sCookie, {}, function(err, res){ | |
if (err) return cb(err); | |
cb(null, res.cookies, {}); | |
}); | |
}; | |
opts.save = function(tasks, results){ | |
fs.writeFileSync('./data.json', JSON.stringify({tasks: tasks, results: results}, null, 4)) | |
}; | |
if(fs.existsSync('./data.json')){ | |
var data = JSON.parse(fs.readFileSync('./data.json', 'utf8')); | |
if (data.tasks.length === 0) { | |
console.log('All tasks done'); | |
process.exit(0); | |
} | |
opts.tasks = data.tasks; | |
opts.results = data.results; | |
} | |
icrawler(URL, opts, function(url, $, _, res){ | |
if ($('#TendinaProv').length > 0) { | |
$('#TendinaProv option').slice(1).each(function() { | |
_.push(sLoc.replace('%s', $(this).attr('value'))); | |
}); | |
} else if ($('select[onchange="onLocSelect()"]').length > 0) { | |
$('option').slice(1).each(function() { | |
_.push(sMarker.replace('%s', url.slice(-2)).replace('%s', $(this).attr('id'))); | |
}); | |
_.log('===' + url.slice(-2) + '===') | |
} else { | |
$('marker').each(function() { | |
_.save({ | |
Title: $(this).attr('insegna').trim(), | |
Address: $(this).attr('indirizzo').trim(), | |
Place: [ | |
$(this).attr('cap').trim(), | |
$(this).attr('localita').trim(), | |
$(this).attr('provincia').trim() | |
].join(' ') | |
}); | |
_.step(); | |
}); | |
} | |
}, function(result){ | |
require('excelize')(result, './', 'ADDR.xlsx', 'sheet', function(err){ | |
if (err) throw err; | |
console.log(result.length + ' adresses saved.'); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment