Skip to content

Instantly share code, notes, and snippets.

@astur astur/index.js
Last active Feb 20, 2018

Embed
What would you like to do?
LIS Map scraping (for Habr)
var log = require('cllc')();
var tress = require('tress');
var needle = require('needle');
var cheerio = require('cheerio');
var fs = require('fs');
var sCookie = 'http://www.puntolis.it/storelocator/defaultsearch.aspx?idcustomer=111';
var sProv = 'http://www.puntolis.it/storelocator/buildMenuProv.ashx?CodSer=111';
var sLoc = 'http://www.puntolis.it/storelocator/buildMenuLoc.ashx?CodSer=111&ProvSel=%s';
var sMarker = 'http://www.puntolis.it/storelocator/Result.aspx?provincia=%s&localita=%s&cap=XXXXX&Servizio=111';
var httpOptions = {};
var results = [];
var q = tress(crawl);
q.success = function(){
q.concurrency = 1;
}
q.retry = function(){
q.concurrency = -10000;
}
q.drain = function(){
fs.writeFileSync('./data.json', JSON.stringify(results, null, 4));
log.finish();
log('Работа закончена');
}
needle.get(sCookie, function(err, res){
if (err || res.statusCode !== 200)
throw err || res.statusCode;
httpOptions.cookies = res.cookies;
log('Начало работы');
log.start('Найдено провинций %s, Найдено коммун %s, Найдено маркеров %s.');
q.push(sProv);
});
function crawl(url, callback){
needle.get(url, httpOptions, function(err, res){
if (err || res.statusCode !== 200) {
q.concurrency === 1 && log.e((err || res.statusCode) + ' - ' + url);
return callback(true);
}
var $ = cheerio.load(res.body);
$('#TendinaProv option').slice(1).each(function() {
q.push(sLoc.replace('%s', $(this).attr('value')));
log.step();
});
$('select[onchange="onLocSelect()"] option').slice(1).each(function() {
q.push(sMarker.replace('%s', url.slice(-2)).replace('%s', $(this).attr('id')));
log.step(0, 1);
});
$('marker').each(function() {
results.push({
Title: $(this).attr('insegna').trim(),
Address: $(this).attr('indirizzo').trim(),
Place: [
$(this).attr('cap').trim(),
$(this).attr('localita').trim(),
$(this).attr('provincia').trim()
].join(' ')
});
log.step(0, 0, 1);
});
callback();
});
}
{
"private": true,
"name": "lis-scraper",
"version": "0.0.1",
"description": "Web scraping example for habrahabr",
"main": "index.js",
"author": "astur <astur@yandex.ru> (http://kozlov.am/)",
"license": "WTFPL",
"dependencies": {
"cheerio": "^0.20.0",
"needle": "^1.0.0",
"tress": "^1.0.0",
"cllc": "^0.0.9"
}
}
@woodsleaf

This comment has been minimized.

Copy link

commented Mar 13, 2017

thanks

@Axeres

This comment has been minimized.

Copy link

commented Mar 29, 2017

Очень хорошая статья для начинающих. ОЧень круто и понятно расписано. Спасибо!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.