Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Reddit Scrapper to Get All Text Between Dates
import praw
from psaw import PushshiftAPI
import datetime as dt
api = PushshiftAPI()
posts_per_call = 100
# 26352000 seconds in range
seconds_between_calls = 26352000/posts_per_call
# class of 2020: Aug 2019 to June 2020
# class of 2021: Aug 2020 to June 2021
start_epoch = int(dt.datetime(2019, 8, 1).timestamp())
end_epoch = int(dt.datetime(2020, 6, 1).timestamp())
sub = 'sat'
f = open(sub + ".txt", "w")
print("Started work on " + sub)
while start_epoch < end_epoch:
currentEnd = start_epoch + seconds_between_calls
results = api.search_submissions(after = int(start_epoch),
before = int(currentEnd),
subreddit=sub,
filter=['selftext'],
limit=posts_per_call)
start_epoch += seconds_between_calls
for i in list(results):
try:
body = i.d_['selftext']
if (body != "[deleted]" and body != "[removed]"):
f.write(body)
except:
print("Error: moving on")
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment