-
-
Save itorr/15ec937cbe45b8a69d9da35d4cdeaa2d to your computer and use it in GitHub Desktop.
v2ex 谢意统计
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var | |
Urls = function(startUrl){ | |
this.startUrl = startUrl; | |
this.page = 0; | |
this.result={ | |
all:{ | |
收到谢意:0, | |
发送谢意:0, | |
} | |
}; | |
} | |
Urls.prototype.crawl = function(year){ | |
var self = this; | |
var url = this.startUrl + this.page; | |
var end = false; | |
$.get(url, function(data, status){ | |
self.page += 1 | |
data.match(/<tr[\s\S]+?<\/tr>/g).forEach(function(tr){ | |
var | |
类型=tr.match(/收到谢意|发送谢意/); | |
if(!类型){ | |
return; | |
} | |
类型=类型[0]; | |
var | |
年份=tr.match(/(20\d\d)-\d\d-\d\d/); | |
年份=年份[1]; | |
if(!self.result[年份]){ | |
self.result[年份]={ | |
收到谢意:0, | |
发送谢意:0, | |
} | |
} | |
self.result['all'][类型]+=1; | |
self.result[年份][类型]+=1; | |
}); | |
if(data.match(/获得初始资本/)){ | |
return console.log('统计结果',self.result); | |
} | |
console.log('第'+self.page+'页'); | |
self.crawl(year); | |
}); | |
} | |
new Urls('https://www.v2ex.com/balance?p=').crawl(2016); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment