const axios = require("axios"); | |
const axiosRetry = require("axios-retry"); | |
const moment = require("moment"); | |
const _ = require("lodash"); | |
const Comment = require("./db/models/Comment.js"); | |
let startDate = new Date("Jan 1, 2015 00:00:00").getTime(); | |
const endDate = new Date("July 27, 2018").getTime(); | |
const config = require("./config/config.js"); | |
const mongoose = require("./db/mongoose.js"); | |
const limit = 50; | |
axiosRetry(axios, { retries: 3 }); | |
// pushshift doesnt use milliseconds, Date does, thats why you see substr & 1000 | |
const fetchComments = async (startDate, limit) => { | |
console.log(`beginning comment fetch from ${startDate} to ${endDate}`); | |
while (startDate < endDate) { | |
// gets the first limit comments after startDate | |
const response = await axios.get( | |
`https://apiv2.pushshift.io/reddit/search/comment/?q=elon%20musk&size=${limit}&after=${startDate | |
.toString() | |
.substr(0, 10)}&before=${endDate.toString().substr(0, 10)}` | |
); | |
const comments = response.data.data; | |
// prep comments for db | |
const commentDocuments = comments.map(comment => { | |
return { | |
commentDate: moment.unix(comment.created_utc).format("MM/DD/YYYY"), | |
subreddit: comment.subreddit, | |
body: comment.body, | |
score: comment.score, | |
}; | |
}); | |
console.log( | |
`insert (${new Date(_.first(comments).created_utc * 1000)} -> ${new Date( | |
_.last(comments).created_utc * 1000 | |
)})` | |
); | |
await Comment.insertMany(commentDocuments).catch(e => console.log(e)); | |
startDate = _.last(comments).created_utc * 1000; | |
} | |
}; | |
fetchComments(startDate, limit); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment