Last active
September 8, 2017 03:28
-
-
Save OxiBo/17792c38fb57c4c1b64f83179eec17f5 to your computer and use it in GitHub Desktop.
CS50 pset6 Sentiments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
class Analyzer(): | |
"""Implements sentiment analysis.""" | |
def __init__(self, positives, negatives): | |
"""Initialize Analyzer.""" | |
# load list of positive words in memory | |
self.positives = set() | |
file1 = open(positives, "r") | |
for line in file1: | |
if not line.startswith(";") and not line.startswith("\n"): | |
self.positives.add(line.strip("\n")) | |
file1.close() | |
# load list of negative words in memory | |
self.negatives = set() | |
file2 = open(negatives, "r") | |
for line in file2: | |
if not line.startswith(";") and not line.startswith("\n"): | |
self.negatives.add(line.strip("\n")) | |
file2.close() | |
def analyze(self, text): | |
"""Analyze text for sentiment, returning its score.""" | |
# declare a variable to store total score of a given text(list of strings) | |
self.total_score=0 | |
# instantiate TweetTokenizer and tokenize given text | |
tokenizer = nltk.tokenize.TweetTokenizer() | |
tokens = tokenizer.tokenize(text) | |
# iterate over each string in tokens and checking if it is in | |
# the list of positive or negative words | |
for word in tokens: | |
if str.lower(word) in self.positives: | |
score = 1 | |
elif str.lower(word) in self.negatives: | |
score = -1 | |
else: | |
score = 0 | |
self.total_score+=score | |
return self.total_score | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import html | |
import os | |
import plotly | |
import socket | |
from twython import Twython | |
from twython import TwythonAuthError, TwythonError, TwythonRateLimitError | |
def chart(positive, negative, neutral): | |
"""Return a pie chart for specified sentiments as HTML.""" | |
# offline plot | |
# https://plot.ly/python/pie-charts/ | |
# https://plot.ly/python/reference/#pie | |
figure = { | |
"data": [ | |
{ | |
"labels": ["positive", "negative", "neutral"], | |
"hoverinfo": "none", | |
"marker": { | |
"colors": [ | |
"rgb(0,255,00)", | |
"rgb(255,0,0)", | |
"rgb(255,255,0)" | |
] | |
}, | |
"type": "pie", | |
"values": [positive, negative, neutral] | |
} | |
], | |
"layout": { | |
"showlegend": True | |
} | |
} | |
return plotly.offline.plot(figure, output_type="div", show_link=False, link_text=False) | |
def get_user_timeline(screen_name, count=200): | |
"""Return list of most recent tweets posted by screen_name.""" | |
# ensure count is valid | |
if count < 1 or count > 200: | |
raise RuntimeError("invalid count") | |
# ensure environment variables are set | |
if not os.environ.get("API_KEY"): | |
raise RuntimeError("API_KEY not set") | |
if not os.environ.get("API_SECRET"): | |
raise RuntimeError("API_SECRET not set") | |
# get screen_name's (or @screen_name's) most recent tweets | |
# https://dev.twitter.com/rest/reference/get/users/lookup | |
# https://dev.twitter.com/rest/reference/get/statuses/user_timeline | |
# https://github.com/ryanmcgrath/twython/blob/master/twython/endpoints.py | |
try: | |
twitter = Twython(os.environ.get("API_KEY"), os.environ.get("API_SECRET")) | |
user = twitter.lookup_user(screen_name=screen_name.lstrip("@")) | |
if user[0]["protected"]: | |
return None | |
tweets = twitter.get_user_timeline(screen_name=screen_name, count=count) | |
return [html.unescape(tweet["text"].replace("\n", " ")) for tweet in tweets] | |
except TwythonAuthError: | |
raise RuntimeError("invalid API_KEY and/or API_SECRET") from None | |
except TwythonRateLimitError: | |
raise RuntimeError("you've hit a rate limit") from None | |
except TwythonError: | |
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# a program that categorizes a user’s tweets as positive or negative. | |
import os | |
import sys | |
import nltk | |
from analyzer import Analyzer | |
from termcolor import colored | |
from helpers import get_user_timeline | |
def main(): | |
# ensure proper usage | |
if len(sys.argv) != 2: | |
sys.exit("Usage: ./tweets @screen_name") | |
# absolute paths to lists of positiv and negative words | |
# Dr. Minqing Hu and Prof. Bing Liu of the University of Illinois at Chicago | |
# kindly put together lists of 2006 positive words and 4783 negative words | |
# https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon | |
positives = os.path.join(sys.path[0], "positive-words.txt") | |
negatives = os.path.join(sys.path[0], "negative-words.txt") | |
# declare amount of tweets to get | |
count = 50 | |
# declare screen_name provided by a user | |
screen_name = sys.argv[1] | |
# get user's most resent tweets | |
# return error messages if not succesful | |
try: | |
tweets = get_user_timeline(screen_name, count) | |
# check if tweets retrived succesfully (the screen name exists and it is not private) | |
if tweets is None or not tweets: | |
sys.exit(1) | |
except RuntimeError as e: | |
print(e) | |
except: | |
sys.exit("No tweets for {}".format(screen_name)) | |
# instantiate analyzer | |
analyzer = Analyzer(positives, negatives) | |
# iterate over each tweet in given list of tweets | |
for tweet in tweets: | |
# analize tweet | |
score = analyzer.analyze(tweet) | |
if score > 0.0: | |
print(colored(" {} {}".format(score, tweet), "green")) | |
elif score < 0.0: | |
print(colored("{} {}".format(score, tweet), "red")) | |
else: | |
print(colored(" {} {}".format(score, tweet), "yellow")) | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment