public
Created

Gist to mine all reddits from Reddit.com for node.js and MongoDb. Complies with reddit policy 1 request per 2 seconds.

  • Download Gist
RedditsMine
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
var mongo = require('mongodb'),
Server = mongo.Server,
Db = mongo.Db;
http = require('http'),
 
var server = new Server('localhost', 27017, {auto_reconnect: true}),
db = new Db('redditDb', server),
 
options = {
host: 'www.reddit.com',
path: '/reddits.json'
},
 
makeRequest = function(options){
var callback = function(response) {
var str = '';
 
response.on('data', function (chunk) {
str += chunk;
});
 
response.on('end', function () {
try {
var result = JSON.parse(str),
childData = result && result.data && result.data.children;
 
if(childData) {
db.collection('reddits', function(err, collection) {
for(var i = 0; i < childData.length; ++i){
collection.insert(childData[i].data, {safe:true}, function(err, result) {
if (!err) {
console.log('Child inserted')
} else {
console.log('Failed to insert child');
}
});
}
});
}
 
var after = result && result.data && result.data.after;
if (after) {
options.path = '/reddits.json?after=' + result.data.after;
setTimeout(function(){
makeRequest(options);
}, 2000);
} else {
console.log('After is not defined');
}
} catch(e) {
console.log(e);
console.log('Failed to process response')
}
});
};
console.log('Requesting ' + options.path);
http.request(options, callback).end();
};
 
db.open(function(err, db) {
if(!err) {
console.log("We are connected. Collecting data");
makeRequest(options);
}
});

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.