Skip to content

Instantly share code, notes, and snippets.

@ty-porter
Created March 14, 2020 18:32
Show Gist options
  • Save ty-porter/739b583118e3ff2988e68f158b2487a0 to your computer and use it in GitHub Desktop.
Save ty-porter/739b583118e3ff2988e68f158b2487a0 to your computer and use it in GitHub Desktop.
Reddit bot for generating a CSV file for stale moderators
from bs4 import BeautifulSoup
import csv
import datetime
import praw
import requests
import traceback
import sys
BOT_USERNAME = 'BOT_USERNAME'
BOT_PASSWORD = 'BOT_PASSWORD'
BOT_CLIENT_ID = 'BOT_CLIENT_ID'
BOT_CLIENT_SECRET = 'BOT_CLIENT_SECRET'
BOT_USER_AGENT = 'BOT_USER_AGENT'
REDDITLIST_URL = 'http://redditlist.com/all?page='
# Edit these as needed to avoid timeouts
HI_LIMIT = 15000 # subscribers
LO_LIMIT = 10000 # subscribers
START_PAGE = 35 # Start page for redditlist.com lookup
ACTIVITY_LIMIT = 60 # days
class Bot():
def __init__(self):
self.reddit = praw.Reddit(username=BOT_USERNAME,
password=BOT_PASSWORD,
client_id=BOT_CLIENT_ID,
client_secret=BOT_CLIENT_SECRET,
user_agent=BOT_USER_AGENT)
self.subreddits = []
self.date_cutoff = datetime.datetime.utcnow().timestamp() - (ACTIVITY_LIMIT * 24 * 60 * 60)
def scrape_subreddits(self):
print('Pulling data from redditlist.com...\n')
page = START_PAGE
low_subscriber_count = HI_LIMIT
while low_subscriber_count >= LO_LIMIT:
response = requests.get(REDDITLIST_URL + str(page))
parser = BeautifulSoup(response.content, 'lxml-html')
subreddits = parser.find_all('div', class_='span4 listing')[1].find_all('div', class_='listing-item')
for subreddit in subreddits:
name = subreddit.find('span', class_='subreddit-url').a.text
subscriber_string = subreddit.find('span', class_='listing-stat').text
subscribers = self.subscribers_to_int(subscriber_string)
if subscribers < low_subscriber_count:
low_subscriber_count = subscribers
if subscribers > HI_LIMIT or subscribers < LO_LIMIT:
continue
else:
subreddit_object = {
'name': name,
'subscribers': subscribers
}
self.subreddits.append(subreddit_object)
page += 1
def subscribers_to_int(self, subscribers):
return int(''.join(subscribers.split(',')))
def botcode(self):
self.scrape_subreddits()
if len(self.subreddits) > 50:
print('WARNING! You are attempting to crawl through {} subreddits.'.format(len(self.subreddits)),
'Recommended limit for this is 50 subreddits.',
'Consider a tighter search to limit API load.\n',
sep='\n')
print('Searching for subreddit moderators...\n')
filename = 'stalemods-{}.csv'.format(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
with open(filename, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['name', 'subscribers', 'total_mods', 'active_mods', 'inactive_mods', 'subreddit_type'])
for subreddit in self.subreddits:
total_mods = 0
inactive_mods = 0
active_mods = 0
for moderator in self.reddit.subreddit(subreddit['name']).moderator():
total_mods += 1
if self.is_active(moderator):
active_mods += 1
else:
inactive_mods += 1
subreddit_type = 'nsfw' if self.reddit.subreddit(subreddit['name']).over18 else 'sfw'
row = [subreddit['name'], subreddit['subscribers'], total_mods, active_mods, inactive_mods, subreddit_type]
writer.writerow(row)
print('Finished! Check {} for results.'.format(filename))
def is_active(self, moderator):
redditor = self.reddit.redditor(moderator.name)
for comment in redditor.comments.new(limit=1):
if comment.created_utc > self.date_cutoff:
return True
break
for submission in redditor.submissions.new(limit=1):
if submission.created_utc > self.date_cutoff:
return True
break
return False
if __name__ == '__main__':
try:
Bot().botcode()
except KeyboardInterrupt:
sys.exit(0)
except Exception:
traceback.print_exc()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment