Created
July 24, 2020 04:38
-
-
Save usametov/f60bb3d16fd28b590aa1e79d06a8858f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install python-twitter | |
# pip install nltk | |
# python -m nltk.downloader vader_lexicon | |
# pip install matplotlib | |
# pip install seaborn | |
import twitter | |
import os | |
import datetime | |
import nltk | |
import re | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
api = twitter.Api( | |
consumer_key=os.environ.get("consumer_key"), | |
consumer_secret=os.environ.get("consumer_secret"), | |
access_token_key=os.environ.get("access_token_key"), | |
access_token_secret=os.environ.get("access_token_secret"), | |
tweet_mode="extended", | |
) | |
keywords = ["crude", "oil"] | |
date_for_query = datetime.date.today() | |
while True: | |
date_formatted = date_for_query.strftime("%Y-%m-%d") | |
query = ( | |
"q=" | |
+ "%20".join(keywords) | |
+ "%20since%3A" | |
+ date_formatted | |
+ "%20" | |
+ "-filter%3Alinks%20filter%3Areplies&count=100&lang=en" | |
) | |
search_results = api.GetSearch(raw_query=query) | |
if len(search_results) > 30: | |
break | |
elif date_for_query < datetime.date.today() - datetime.timedelta(days=7): | |
raise ValueError( | |
f"In the last week there has not been enough tweets to produce valuable sentiment analysis for following keywords: {', '.join(keywords)}. Please, try again with different keywords or a lesser amount of keywords, since all of the specified keywords must appear in the tweet for it to be considered for analysis." | |
) | |
else: | |
date_for_query = date_for_query - datetime.timedelta(days=1) | |
tweets_text = [] | |
tweets_authors = [] | |
for tweet in search_results: | |
# CAVEAT: consider only a single tweet from one author - this will ensure that a total sentiment is an opinion of multiple tweeter's users, not just a single one | |
if tweet.user not in tweets_authors: | |
tweets_authors.append(tweet.user) | |
else: | |
continue | |
tweet_text = tweet.full_text | |
# CAVEAT: getting rid of hashtags and @ - no reason to subject it to sentiment analysis | |
if "@" in tweet_text: | |
tweet_text = re.sub("(@)\w+", "", tweet_text) | |
if "#" in tweet_text: | |
tweet_text = re.sub("(#)\w+", "", tweet_text) | |
if len(tweet_text) > 20: | |
tweets_text.append(tweet_text.lstrip()) | |
def vader_sentiment_score(tweet_text: str) -> float: | |
return round( | |
SentimentIntensityAnalyzer().polarity_scores(tweet_text)["compound"], 3 | |
) | |
sentiment_score_vader = [] | |
for tweet in tweets_text: | |
sentiment_score_vader.append(vader_sentiment_score(tweet)) | |
sentiment_list = [] | |
for score in sentiment_score_vader: | |
if score < -0.05: | |
sentiment_list.append("negative") | |
elif -0.05 <= score <= 0.05: | |
sentiment_list.append("neutral") | |
elif score > 0.05: | |
sentiment_list.append("positive") | |
x_axis = ["negative", "neutral", "positive"] | |
y_axis = [ | |
sentiment_list.count("negative"), | |
sentiment_list.count("neutral"), | |
sentiment_list.count("positive"), | |
] | |
colors = ["firebrick", "dodgerblue", "limegreen"] | |
sns.set_palette(sns.color_palette(colors)) | |
sns.set_style("darkgrid") | |
fig = plt.figure(figsize=(4, 4), dpi=300) | |
ax = plt.axes() | |
ax.set_ylabel("Amount", weight="bold", fontsize="large") | |
sns.barplot(x=x_axis, y=y_axis) | |
if date_for_query != datetime.date.today(): | |
fig.suptitle( | |
f"Sentiment of Tweets containing keywords: {', '.join(keywords).replace('[', '').replace(']', '')}\ndated for period: {date_for_query.strftime('%d.%m')} - {datetime.date.today().strftime('%d.%m')}", | |
weight="bold", | |
) | |
else: | |
fig.suptitle( | |
f"Sentiment of Tweets containing keywords: {', '.join(keywords).replace('[', '').replace(']', '')}\npublished today ({date_for_query.strftime('%d.%m')})", | |
weight="bold", | |
) | |
ylim = max(*y_axis) + 0.1 * max(*y_axis) | |
for p in ax.patches: | |
b = p.get_bbox() | |
val = f"{int(b.y1 + b.y0)}" | |
y_offset = 0.3 | |
if int(val) < 10: | |
x_offset = -0.04 | |
else: | |
x_offset = -0.08 | |
ax.annotate(val, ((b.x0 + b.x1) / 2 + x_offset, b.y1 + y_offset), weight="bold") | |
ax.set_ylim(plt.axes().get_ylim()[0], ylim + 0.5) | |
fig.savefig( | |
f"tweets_sentiment_{'_'.join(keywords).replace('[', '').replace(']', '')}_{datetime.date.today().strftime('%d_%m')}.png", | |
bbox_inches="tight", | |
pad_inches=0.1, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment