Skip to content

Instantly share code, notes, and snippets.

@scutdavy
Created November 11, 2013 16:35
Show Gist options
  • Save scutdavy/7416123 to your computer and use it in GitHub Desktop.
Save scutdavy/7416123 to your computer and use it in GitHub Desktop.
赶集网二手房数据定向爬虫
var needle = require("needle");
var cheerio = require('cheerio');
var assert = require('assert');
var async = require('async');
var fs = require('fs');
var itemWith = function($){
item = new Object;
var $house = $.find('.house');
var $housetitle = $house.children().first();
item.title = $housetitle.find('a').text();
item.houseUrl = 'http://esf.sh.soufun.com' + $housetitle.find('a').attr('href');
item.money = $.find('.money').children().first().text();
item.area = $.find('.area').text();
item.desc = $.find('p').eq(2).text();
var $com = $.find('p').eq(1);
item.comName = $com.find('a').find('span').text();
item.comRef = 'http://esf.sh.soufun.com' + $com.find('a').attr('href');
item.comAddress = $com.find('span').text();
return item;
};
var pageItemsWithDOM = function($){
$('.list_pic').children().last().remove();
return $('.list_pic').children().map(function(){
return itemWith($(this));
});
};
var load = function(url, callback){
console.log(url);
needle.get(url, function(err, resp, body){
if (!err && resp.statusCode == 200){
var pageItems = pageItemsWithDOM(cheerio.load(body));
callback(pageItems);
}else{
console.log(err);
callback();
}
});
};
(function(){
var baseUrl = 'http://esf.sh.soufun.com/house/3';
var urlForIndex = function(index){
return baseUrl + index + "/";
};
var items = new Array();
var index = 1;
async.whilst(
function () { return index < 101; },
function (callback) {
load(urlForIndex(index), function(pageItems){
items = items.concat(pageItems);
console.log(pageItems);
callback(null, index);
});
index++;
},
function (err) {
console.log("complete");
fs.writeFile('items.json', JSON.stringify(items, null, 4), function(error){
if(!error){
console.log("write complete");
}
});
}
);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment