Skip to content

Instantly share code, notes, and snippets.

@itorr
Last active January 6, 2017 10:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save itorr/15ec937cbe45b8a69d9da35d4cdeaa2d to your computer and use it in GitHub Desktop.
Save itorr/15ec937cbe45b8a69d9da35d4cdeaa2d to your computer and use it in GitHub Desktop.
v2ex 谢意统计
var
Urls = function(startUrl){
this.startUrl = startUrl;
this.page = 0;
this.result={
all:{
收到谢意:0,
发送谢意:0,
}
};
}
Urls.prototype.crawl = function(year){
var self = this;
var url = this.startUrl + this.page;
var end = false;
$.get(url, function(data, status){
self.page += 1
data.match(/<tr[\s\S]+?<\/tr>/g).forEach(function(tr){
var
类型=tr.match(/收到谢意|发送谢意/);
if(!类型){
return;
}
类型=类型[0];
var
年份=tr.match(/(20\d\d)-\d\d-\d\d/);
年份=年份[1];
if(!self.result[年份]){
self.result[年份]={
收到谢意:0,
发送谢意:0,
}
}
self.result['all'][类型]+=1;
self.result[年份][类型]+=1;
});
if(data.match(/获得初始资本/)){
return console.log('统计结果',self.result);
}
console.log('第'+self.page+'页');
self.crawl(year);
});
}
new Urls('https://www.v2ex.com/balance?p=').crawl(2016);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment