Skip to content

Instantly share code, notes, and snippets.

@rlemon
Created August 26, 2015 18:18
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save rlemon/577a3ad0bf471aaa2c8a to your computer and use it in GitHub Desktop.
var request = require('request'),
$ = require('cheerio');
var roomid = process.argv[2];
if (!roomid) {
throw new Error('you need to pass a roomid and a output file');
}
var outputfile = process.argv[3] || roomid;
var found = [];
var page = 1;
var start = Date.now();
var siteRoot = 'http://chat.stackoverflow.com/rooms/info/' + roomid + '/';
console.log('scraping room#', roomid);
scrape(siteRoot + '?tab=stars');
function scrape(url) {
console.log('scraping page ', page);
request(url, function(err, res, html) {
var root = $(html);
var newUrl = root.find('a[rel="next"]').attr('href');
var matches = [];
root.find('.monologue').each(function(i, row) {
var messageId = $(row).find('.message a').attr('name');
var messageStarCount = $(row).find('.flash .stars .times').text();
var user = $(row).find('.username').text();
var userid = $(row).find('.username a').attr('href');
matches.push({
id: messageId,
count: messageStarCount,
username: user,
userid: userid
});
});
found = found.concat(matches);
if (newUrl) {
page++;
console.log('total time: ', (Date.now() - start) / 1000, 'seconds');
scrape(siteRoot + newUrl);
} else {
finish();
}
});
}
function finish() {
var ws = require('fs').createWriteStream(outputfile);
ws.on('error', function(err) {
console.error(err);
});
ws.write(JSON.stringify(found));
ws.end();
console.log('found stars written to ', outputfile);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment