Created
May 9, 2018 20:42
-
-
Save beta-decay/6b23010d6c14fa3a9d7999179ffb1983 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tweepy | |
from os import path | |
from scipy.misc import imread | |
import matplotlib.pyplot as plt | |
import random | |
import re | |
from wordcloud import WordCloud, STOPWORDS | |
import numpy as np | |
from PIL import Image | |
# Consumer keys and access tokens, used for OAuth | |
consumer_key = '' | |
consumer_secret = '' | |
access_token = '' | |
access_token_secret = '' | |
# OAuth process, using the keys and tokens | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
user = "realDonaldTrump" | |
# Creation of the actual interface, using authentication | |
api = tweepy.API(auth, wait_on_rate_limit=True) | |
word_count = {} | |
text = "" | |
for status in tweepy.Cursor(api.user_timeline, screen_name='@'+user).items(): | |
tweet = status.text.lower()#.encode('utf-8',errors='ignore') | |
text += " "+tweet | |
text_nourl = re.sub(r'(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w\.-]*)*\/?\S', '', text) | |
stopwords = set(STOPWORDS) | |
stopwords.add("amp") | |
stopwords.add("rt") | |
stopwords.add("rt realdonaldtrump") | |
stopwords.add("rt foxandfriends") | |
stopwords.add("rt erictrump") | |
usa_mask = np.array(Image.open(r"us_mask.png")) | |
wordcloud = WordCloud(background_color="white", mask=usa_mask, max_words=2000, width=3000, height=2000, stopwords=stopwords).generate(text_nourl) | |
plt.imshow(wordcloud, interpolation='bilinear') | |
plt.axis("off") | |
plt.savefig('trump_world_cloud.png', dpi = 1000) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment