Skip to content

Instantly share code, notes, and snippets.

@anthonytxie
Created July 31, 2018 07:16
Show Gist options
  • Save anthonytxie/3884d98084fcd3fab6e0cff910e8aa2e to your computer and use it in GitHub Desktop.
Save anthonytxie/3884d98084fcd3fab6e0cff910e8aa2e to your computer and use it in GitHub Desktop.
const axios = require("axios");
const axiosRetry = require("axios-retry");
const moment = require("moment");
const _ = require("lodash");
const Comment = require("./db/models/Comment.js");
let startDate = new Date("Jan 1, 2015 00:00:00").getTime();
const endDate = new Date("July 27, 2018").getTime();
const config = require("./config/config.js");
const mongoose = require("./db/mongoose.js");
const limit = 50;
axiosRetry(axios, { retries: 3 });
// pushshift doesnt use milliseconds, Date does, thats why you see substr & 1000
const fetchComments = async (startDate, limit) => {
console.log(`beginning comment fetch from ${startDate} to ${endDate}`);
while (startDate < endDate) {
// gets the first limit comments after startDate
const response = await axios.get(
`https://apiv2.pushshift.io/reddit/search/comment/?q=elon%20musk&size=${limit}&after=${startDate
.toString()
.substr(0, 10)}&before=${endDate.toString().substr(0, 10)}`
);
const comments = response.data.data;
// prep comments for db
const commentDocuments = comments.map(comment => {
return {
commentDate: moment.unix(comment.created_utc).format("MM/DD/YYYY"),
subreddit: comment.subreddit,
body: comment.body,
score: comment.score,
};
});
console.log(
`insert (${new Date(_.first(comments).created_utc * 1000)} -> ${new Date(
_.last(comments).created_utc * 1000
)})`
);
await Comment.insertMany(commentDocuments).catch(e => console.log(e));
startDate = _.last(comments).created_utc * 1000;
}
};
fetchComments(startDate, limit);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment