Skip to content

Instantly share code, notes, and snippets.

@lucassimon
Created October 9, 2015 02:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lucassimon/8861dbba0a69e5633669 to your computer and use it in GitHub Desktop.
Save lucassimon/8861dbba0a69e5633669 to your computer and use it in GitHub Desktop.
var crawlerjs = require('crawler-js');
var mongo = require('mongodb');
var monk = require('monk');
var db = monk('localhost:27017/airport');
var crawler = {
interval: 1000,
getSample: 'http://www.aeroportosdobrasil.com/aeroportos/listagem/1',
get: 'http://www.aeroportosdobrasil.com/aeroportos/listagem/[numbers:1:33:1]',
preview: 0,
extractors: [
{
selector: '.table > tbody tr',
callback: function(err, html){
if(!err){
link = html.children('td').eq(0).children('a').attr('href');
url = 'http://www.aeroportosdobrasil.com'+link;
console.log(url);
var detail = {
interval: 1000,
get: url,
preview: 0,
extractors: [
{
selector: '.table > tbody tr',
callback: function(err, html){
if (!err) {
data = {};
data.iata = html.children('td').eq(0).children('dd').eq(0).text();
console.log(data);
var collection = db.get('aeroportos_br');
collection.insert(data);
} else {
console.log(err);
}
}
}
]
}
crawlerjs(detail)
next();
// data = {};
// data.nome = html.children('td').eq(0).children('a').text();
// data.iata = html.children('td').eq(1).text();
// data.icao = html.children('td').eq(2).text();
// data.municipio = html.children('td').eq(3).text();
// data.estado = html.children('td').eq(4).text();
// data.telefone = html.children('td').eq(5).text();
// data.endereço = html.children('td').eq(6).text();
// console.log(data);
// var collection = db.get('aeroportos_br');
// collection.insert(data);
}else{
console.log(err);
}
},
}
]
}
// var config = {
// mongoDB: 'airports',
// mongoDBHost: 'localhost',
// mongoDBPort: '27017'
// }
crawlerjs(crawler)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment