Skip to content

Instantly share code, notes, and snippets.

@MattSandy
Created October 27, 2016 16:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MattSandy/7b45b29c56b0ec81eb1b3df32a1e0cab to your computer and use it in GitHub Desktop.
Save MattSandy/7b45b29c56b0ec81eb1b3df32a1e0cab to your computer and use it in GitHub Desktop.
Reddit Front Page Monitor
var http = require('http');
var https = require('https');
var fs = require('fs');
var post_array = [];
var user_array = [];
fs.writeFile('posts.csv', 'Author,ID,Post Date,Comments,Score,Stickied,Pull,Subreddit\n', function(){console.log('Cleared posts.csv')});
fs.writeFile('users.csv', 'Author,Author Date\n', function(){console.log('Cleared users.csv')});
subreddits = ["all"]
for (var i=0;i<subreddits.length; i++) {
var subreddit = subreddits[i];
setTimeout(function(){ scrape_hot("",1,subreddit); }, 1000, subreddit);
}
function scrape_hot(after,page,subreddit) {
var url = "https://www.reddit.com/r/" + subreddit + "/.json?after=" + after;
https.get(url, function(res){
var body = '';
res.on('data', function(chunk){
body += chunk;
});
res.on('end', function(){
try {
var response = JSON.parse(body);
for(var i=0;i<response.data.children.length;i++) {
//if not logged already
if(post_array.indexOf(response.data.children[i].data.id) === -1) {
//save to array
post_array.push(response.data.children[i].data.id);
//setup line to write to file
var line = response.data.children[i].data.author + ',' + response.data.children[i].data.id + ',' +
format_date(response.data.children[i].data.created) + ',' + response.data.children[i].data.num_comments + ',' +
response.data.children[i].data.score + ',' + response.data.children[i].data.stickied + ',' +
'hot,' + response.data.children[i].data.subreddit + "\n";
console.log(line);
//get/write user information
scrape_user(response.data.children[i].data.author);
//write line
fs.appendFile('posts.csv', line, function (err) {
//error
});
}
}
if(page<40) {
scrape_hot(response.data.after,page+1,subreddit);
} else {
setTimeout(function(){ scrape_hot("",1,subreddit); }, 1000*60*20, subreddit);
}
} catch (err) {
console.log(err);
scrape_hot(after,page,subreddit);
}
});
}).on('error', function(e){
scrape_hot(after,page,subreddit);
});
}
function scrape_user(user) {
if(user_array.indexOf(user) === -1) {
user_array.push(user);
var url = "https://www.reddit.com/user/" + user + "/about.json";
https.get(url, function(res){
var body = '';
res.on('data', function(chunk){
body += chunk;
});
res.on('end', function(){
try {
var response = JSON.parse(body);
var line = user + "," + format_date(response.data.created) + "\n";
fs.appendFile('users.csv', line, function (err) {
//success
});
} catch (err) {
scrape_user(user);
}
});
}).on('error', function(e){
console.log("Got an error: ", e);
scrape_user(user);
});
}
}
function format_date(date) {
var date = new Date(date*1000);
var yyyy = date.getFullYear().toString();
var mm = (date.getMonth()+1).toString(); // getMonth() is zero-based
var dd = date.getDate().toString();
return yyyy + "-" + (mm[1]?mm:"0"+mm[0]) + "-" + (dd[1]?dd:"0"+dd[0]); // padding
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment