Skip to content

Instantly share code, notes, and snippets.

@JonnoFTW
Created August 19, 2019 00:52
Show Gist options
  • Save JonnoFTW/f59b02481e91e07ef5546def52294f62 to your computer and use it in GitHub Desktop.
Save JonnoFTW/f59b02481e91e07ef5546def52294f62 to your computer and use it in GitHub Desktop.
import re
import praw
import prawcore.exceptions
from collections import defaultdict
from itertools import product
from tqdm import tqdm
from pymongo import MongoClient
client = praw.Reddit(client_id='client id', client_secret='client secret', user_agent='gender scraper')
subs = 'askmen','askwomen'
users_of_type = defaultdict(set)
def add_author(x):
if hasattr(x, 'author') and x.author and x.author_flair_text:
gender = {
'♂':'male',
'♀':'female'
}.get(x.author_flair_text.lower())
if gender is not None:
users_of_type[gender].add(x.author.name)
for sub in subs:
print(f"Getting {sub}")
posts = tqdm(client.subreddit(sub).hot(limit=1000), desc=sub)
for p in posts:
add_author(p)
for c in p.comments:
add_author(c)
mongo = MongoClient()
db = mongo['reddit_text']
coll = db['text']
for t, users in users_of_type.items():
for user in users:
if coll.find_one({'author': user}):
print(f"Already did {user}")
continue
try:
user_things = client.redditor(user).hot(limit=1000)
print(f"Starting {user}", end='... ')
for thing in user_things:
coll.insert_one({
'author': user,
'gender': t,
'text': thing.body
})
print(f"Collected {user}")
except prawcore.exceptions.Forbidden:
print(f"Skipping {user}")
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment