Skip to content

Instantly share code, notes, and snippets.

@OxiBo
Last active September 8, 2017 03:28
Show Gist options
  • Save OxiBo/17792c38fb57c4c1b64f83179eec17f5 to your computer and use it in GitHub Desktop.
Save OxiBo/17792c38fb57c4c1b64f83179eec17f5 to your computer and use it in GitHub Desktop.
CS50 pset6 Sentiments
import nltk
class Analyzer():
"""Implements sentiment analysis."""
def __init__(self, positives, negatives):
"""Initialize Analyzer."""
# load list of positive words in memory
self.positives = set()
file1 = open(positives, "r")
for line in file1:
if not line.startswith(";") and not line.startswith("\n"):
self.positives.add(line.strip("\n"))
file1.close()
# load list of negative words in memory
self.negatives = set()
file2 = open(negatives, "r")
for line in file2:
if not line.startswith(";") and not line.startswith("\n"):
self.negatives.add(line.strip("\n"))
file2.close()
def analyze(self, text):
"""Analyze text for sentiment, returning its score."""
# declare a variable to store total score of a given text(list of strings)
self.total_score=0
# instantiate TweetTokenizer and tokenize given text
tokenizer = nltk.tokenize.TweetTokenizer()
tokens = tokenizer.tokenize(text)
# iterate over each string in tokens and checking if it is in
# the list of positive or negative words
for word in tokens:
if str.lower(word) in self.positives:
score = 1
elif str.lower(word) in self.negatives:
score = -1
else:
score = 0
self.total_score+=score
return self.total_score
import html
import os
import plotly
import socket
from twython import Twython
from twython import TwythonAuthError, TwythonError, TwythonRateLimitError
def chart(positive, negative, neutral):
"""Return a pie chart for specified sentiments as HTML."""
# offline plot
# https://plot.ly/python/pie-charts/
# https://plot.ly/python/reference/#pie
figure = {
"data": [
{
"labels": ["positive", "negative", "neutral"],
"hoverinfo": "none",
"marker": {
"colors": [
"rgb(0,255,00)",
"rgb(255,0,0)",
"rgb(255,255,0)"
]
},
"type": "pie",
"values": [positive, negative, neutral]
}
],
"layout": {
"showlegend": True
}
}
return plotly.offline.plot(figure, output_type="div", show_link=False, link_text=False)
def get_user_timeline(screen_name, count=200):
"""Return list of most recent tweets posted by screen_name."""
# ensure count is valid
if count < 1 or count > 200:
raise RuntimeError("invalid count")
# ensure environment variables are set
if not os.environ.get("API_KEY"):
raise RuntimeError("API_KEY not set")
if not os.environ.get("API_SECRET"):
raise RuntimeError("API_SECRET not set")
# get screen_name's (or @screen_name's) most recent tweets
# https://dev.twitter.com/rest/reference/get/users/lookup
# https://dev.twitter.com/rest/reference/get/statuses/user_timeline
# https://github.com/ryanmcgrath/twython/blob/master/twython/endpoints.py
try:
twitter = Twython(os.environ.get("API_KEY"), os.environ.get("API_SECRET"))
user = twitter.lookup_user(screen_name=screen_name.lstrip("@"))
if user[0]["protected"]:
return None
tweets = twitter.get_user_timeline(screen_name=screen_name, count=count)
return [html.unescape(tweet["text"].replace("\n", " ")) for tweet in tweets]
except TwythonAuthError:
raise RuntimeError("invalid API_KEY and/or API_SECRET") from None
except TwythonRateLimitError:
raise RuntimeError("you've hit a rate limit") from None
except TwythonError:
return None
#!/usr/bin/env python3
# a program that categorizes a user’s tweets as positive or negative.
import os
import sys
import nltk
from analyzer import Analyzer
from termcolor import colored
from helpers import get_user_timeline
def main():
# ensure proper usage
if len(sys.argv) != 2:
sys.exit("Usage: ./tweets @screen_name")
# absolute paths to lists of positiv and negative words
# Dr. Minqing Hu and Prof. Bing Liu of the University of Illinois at Chicago
# kindly put together lists of 2006 positive words and 4783 negative words
# https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon
positives = os.path.join(sys.path[0], "positive-words.txt")
negatives = os.path.join(sys.path[0], "negative-words.txt")
# declare amount of tweets to get
count = 50
# declare screen_name provided by a user
screen_name = sys.argv[1]
# get user's most resent tweets
# return error messages if not succesful
try:
tweets = get_user_timeline(screen_name, count)
# check if tweets retrived succesfully (the screen name exists and it is not private)
if tweets is None or not tweets:
sys.exit(1)
except RuntimeError as e:
print(e)
except:
sys.exit("No tweets for {}".format(screen_name))
# instantiate analyzer
analyzer = Analyzer(positives, negatives)
# iterate over each tweet in given list of tweets
for tweet in tweets:
# analize tweet
score = analyzer.analyze(tweet)
if score > 0.0:
print(colored(" {} {}".format(score, tweet), "green"))
elif score < 0.0:
print(colored("{} {}".format(score, tweet), "red"))
else:
print(colored(" {} {}".format(score, tweet), "yellow"))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment