Last active
July 22, 2020 18:41
-
-
Save joshstrupp/6b5769d58e83ead78801b46e4508a6a6 to your computer and use it in GitHub Desktop.
Generating keyword and sentiment insights for select Subreddit(s)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import praw | |
import nltk | |
import random | |
from pprint import pprint | |
# Enter your own client_id, client_secret, username and password, or follow this quick start guide: https://github.com/reddit-archive/reddit/wiki/OAuth2-Quick-Start-Example#first-steps | |
reddit = praw.Reddit(user_agent='Comment Extraction (by /u/USERNAME)',client_id='enter_here',client_secret="enter_here",username='enter_here', password='enter_here') | |
from textblob import TextBlob | |
import matplotlib.pyplot as plt | |
from wordcloud import WordCloud | |
def subreddit_hot(subreddit): | |
print('Commonly used words displayed as a Word Cloud for the 50 most recent hot posts in: ', subreddit) | |
# open new file and write in data | |
with open('%s_hot.txt' % subreddit, 'a') as file: | |
posts = subreddit.hot(limit=50) | |
with open('%s_hot.txt' % subreddit, 'w') as file: | |
for post in posts: | |
file.write(post.title + '\n') | |
with open('%s_hot.txt' % subreddit, 'r') as file: | |
wordcloud_data = file.read() | |
# generate wordcloud | |
from wordcloud import WordCloud, STOPWORDS | |
stopwords = set(STOPWORDS) | |
stopwords.update(['It','This','be']) | |
wordcloud = WordCloud(stopwords=stopwords,height=1000, width=3000, max_words=40, \ | |
background_color='white').generate(wordcloud_data) | |
plt.figure() | |
plt.imshow(wordcloud, interpolation="bilinear") | |
plt.axis("off") | |
plt.margins(x=0, y=0) | |
plt.show() | |
# begin sentiment analysis | |
with open('%s_hot.txt' % subreddit, 'r') as file: | |
for w in file: | |
if w not in stopwords: | |
file_posts = file.readlines() | |
labels = ['Negative', 'Neutral', 'Positive'] | |
values = [0,0,0] | |
for posts in file_posts: | |
sentiment = TextBlob(posts) | |
polarity = round((sentiment.polarity + 1) * 3) % 3 | |
values[polarity] = values[polarity] + 1 | |
print('\n\n','Sentiment breakdown for 50 most recent hot posts in: ', subreddit, '\n\n') | |
colors = ['red','gray','green'] | |
plt.pie(values, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140) | |
plt.axis('equal') | |
plt.show() | |
# print hot post titles | |
print('\n\n','This week\'s top posts in ' , subreddit) | |
for submission in subreddit.top("week", limit=10): | |
print(submission.title) | |
# Add subreddits you want to analyze below. | |
subreddit_hot(reddit.subreddit('apple')) | |
subreddit_hot(reddit.subreddit('microsoft')) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment