Skip to content

Instantly share code, notes, and snippets.

@HebeHH
Created August 3, 2018 15:27
Show Gist options
  • Save HebeHH/d4036d866f48c8294ab31be99223e756 to your computer and use it in GitHub Desktop.
Save HebeHH/d4036d866f48c8294ab31be99223e756 to your computer and use it in GitHub Desktop.
import praw
import re
import pandas as pd
# connect to reddit
reddit = praw.Reddit(client_id='my_id', client_secret='my_secret', user_agent='me')
# get new submissions from News
submissions = []
for submission in reddit.subreddit("News").new(limit = None):
submissions.append([submission.title, submission.score])
submissions = pd.DataFrame(submissions, columns = ['titles', 'scores'])
# get most common proper nouns
stop_words = 'In|The|Man|New|What|My|This|Woman|Best|Why|How|You|Is|Part|To|After|First|No|Boy'
all_titles = re.sub(stop_words, "", " ". join(submissions.titles))
ProperNouns = re.findall(r'[A-Z][a-z]+', all_titles)
search_terms = pd.Series(ProperNouns).value_counts().nlargest(10).index.values
# return all submissions referencing one of the search terms with more than 200 upvotes
for term in search_terms:
print "\n\n Titles about " + term + ":"
print submissions[(submissions.titles.str.contains("(?i)"+term)) & (submissions['scores'] >= 100)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment