OxiBo/analyzer.py

## analyzer.py
import nltk

class Analyzer():
    """Implements sentiment analysis."""

    def __init__(self, positives, negatives):
        """Initialize Analyzer."""

        # load list of positive words in memory
        self.positives = set()
        file1 = open(positives, "r")
        for line in file1:
            if not line.startswith(";") and not line.startswith("\n"):
                self.positives.add(line.strip("\n"))
        file1.close()

        # load list of negative words in memory
        self.negatives = set()
        file2 = open(negatives, "r")
        for line in file2:
            if not line.startswith(";") and not line.startswith("\n"):
                self.negatives.add(line.strip("\n"))
        file2.close()


    def analyze(self, text):
        """Analyze text for sentiment, returning its score."""
        # declare a variable to store total score of a given text(list of strings)
        self.total_score=0

        # instantiate TweetTokenizer and tokenize given text
        tokenizer = nltk.tokenize.TweetTokenizer()
        tokens = tokenizer.tokenize(text)

        # iterate over each string in tokens and checking if it is in
        # the list of positive or negative words
        for word in tokens:
            if str.lower(word) in self.positives:
                score = 1
            elif str.lower(word) in self.negatives:
                score = -1
            else:
                score = 0

            self.total_score+=score

        return self.total_score


## helpers.py
import html
import os
import plotly
import socket

from twython import Twython
from twython import TwythonAuthError, TwythonError, TwythonRateLimitError

def chart(positive, negative, neutral):
    """Return a pie chart for specified sentiments as HTML."""

    # offline plot
    # https://plot.ly/python/pie-charts/
    # https://plot.ly/python/reference/#pie
    figure = {
        "data": [
            {
                "labels": ["positive", "negative", "neutral"],
                "hoverinfo": "none",
                "marker": {
                    "colors": [
                        "rgb(0,255,00)",
                        "rgb(255,0,0)",
                        "rgb(255,255,0)"
                    ]
                },
                "type": "pie",
                "values": [positive, negative, neutral]
            }
        ],
        "layout": {
            "showlegend": True
            }
    }
    return plotly.offline.plot(figure, output_type="div", show_link=False, link_text=False)

def get_user_timeline(screen_name, count=200):
    """Return list of most recent tweets posted by screen_name."""

    # ensure count is valid
    if count < 1 or count > 200:
        raise RuntimeError("invalid count")

    # ensure environment variables are set
    if not os.environ.get("API_KEY"):
        raise RuntimeError("API_KEY not set")
    if not os.environ.get("API_SECRET"):
        raise RuntimeError("API_SECRET not set")

    # get screen_name's (or @screen_name's) most recent tweets
    # https://dev.twitter.com/rest/reference/get/users/lookup
    # https://dev.twitter.com/rest/reference/get/statuses/user_timeline
    # https://github.com/ryanmcgrath/twython/blob/master/twython/endpoints.py
    try:
        twitter = Twython(os.environ.get("API_KEY"), os.environ.get("API_SECRET"))
        user = twitter.lookup_user(screen_name=screen_name.lstrip("@"))
        if user[0]["protected"]:
            return None
        tweets = twitter.get_user_timeline(screen_name=screen_name, count=count)
        return [html.unescape(tweet["text"].replace("\n", " ")) for tweet in tweets]
    except TwythonAuthError:
        raise RuntimeError("invalid API_KEY and/or API_SECRET") from None
    except TwythonRateLimitError:
        raise RuntimeError("you've hit a rate limit") from None
    except TwythonError:
        return None

## tweets.py
#!/usr/bin/env python3
# a program that categorizes a user’s tweets as positive or negative.


import os
import sys
import nltk


from analyzer import Analyzer
from termcolor import colored
from helpers import get_user_timeline

def main():

    # ensure proper usage
    if len(sys.argv) != 2:
        sys.exit("Usage: ./tweets @screen_name")

    # absolute paths to lists of positiv and negative words
    # Dr. Minqing Hu and Prof. Bing Liu of the University of Illinois at Chicago
    # kindly put together lists of 2006 positive words and 4783 negative words
    # https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")

    # declare amount of tweets to get
    count = 50

    # declare screen_name provided by a user
    screen_name = sys.argv[1]

    # get user's most resent tweets
    # return error messages if not succesful
    try:

        tweets = get_user_timeline(screen_name, count)

        # check if tweets retrived succesfully (the screen name exists and it is not private)
        if tweets is None or not tweets:
            sys.exit(1)

    except RuntimeError as e:
        print(e)
    except:
        sys.exit("No tweets for {}".format(screen_name))

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)

    # iterate over each tweet in given list of tweets
    for tweet in tweets:

        # analize tweet
        score = analyzer.analyze(tweet)


        if score > 0.0:
            print(colored(" {} {}".format(score, tweet), "green"))
        elif score < 0.0:
            print(colored("{} {}".format(score, tweet), "red"))
        else:
            print(colored(" {} {}".format(score, tweet), "yellow"))


if __name__ == "__main__":
    main()
	import nltk

	class Analyzer():
	"""Implements sentiment analysis."""

	def __init__(self, positives, negatives):
	"""Initialize Analyzer."""

	# load list of positive words in memory
	self.positives = set()
	file1 = open(positives, "r")
	for line in file1:
	if not line.startswith(";") and not line.startswith("\n"):
	self.positives.add(line.strip("\n"))
	file1.close()

	# load list of negative words in memory
	self.negatives = set()
	file2 = open(negatives, "r")
	for line in file2:
	if not line.startswith(";") and not line.startswith("\n"):
	self.negatives.add(line.strip("\n"))
	file2.close()



	def analyze(self, text):
	"""Analyze text for sentiment, returning its score."""
	# declare a variable to store total score of a given text(list of strings)
	self.total_score=0

	# instantiate TweetTokenizer and tokenize given text
	tokenizer = nltk.tokenize.TweetTokenizer()
	tokens = tokenizer.tokenize(text)

	# iterate over each string in tokens and checking if it is in
	# the list of positive or negative words
	for word in tokens:
	if str.lower(word) in self.positives:
	score = 1
	elif str.lower(word) in self.negatives:
	score = -1
	else:
	score = 0

	self.total_score+=score

	return self.total_score
	import html
	import os
	import plotly
	import socket

	from twython import Twython
	from twython import TwythonAuthError, TwythonError, TwythonRateLimitError

	def chart(positive, negative, neutral):
	"""Return a pie chart for specified sentiments as HTML."""

	# offline plot
	# https://plot.ly/python/pie-charts/
	# https://plot.ly/python/reference/#pie
	figure = {
	"data": [
	{
	"labels": ["positive", "negative", "neutral"],
	"hoverinfo": "none",
	"marker": {
	"colors": [
	"rgb(0,255,00)",
	"rgb(255,0,0)",
	"rgb(255,255,0)"
	]
	},
	"type": "pie",
	"values": [positive, negative, neutral]
	}
	],
	"layout": {
	"showlegend": True
	}
	}
	return plotly.offline.plot(figure, output_type="div", show_link=False, link_text=False)

	def get_user_timeline(screen_name, count=200):
	"""Return list of most recent tweets posted by screen_name."""

	# ensure count is valid
	if count < 1 or count > 200:
	raise RuntimeError("invalid count")

	# ensure environment variables are set
	if not os.environ.get("API_KEY"):
	raise RuntimeError("API_KEY not set")
	if not os.environ.get("API_SECRET"):
	raise RuntimeError("API_SECRET not set")

	# get screen_name's (or @screen_name's) most recent tweets
	# https://dev.twitter.com/rest/reference/get/users/lookup
	# https://dev.twitter.com/rest/reference/get/statuses/user_timeline
	# https://github.com/ryanmcgrath/twython/blob/master/twython/endpoints.py
	try:
	twitter = Twython(os.environ.get("API_KEY"), os.environ.get("API_SECRET"))
	user = twitter.lookup_user(screen_name=screen_name.lstrip("@"))
	if user[0]["protected"]:
	return None
	tweets = twitter.get_user_timeline(screen_name=screen_name, count=count)
	return [html.unescape(tweet["text"].replace("\n", " ")) for tweet in tweets]
	except TwythonAuthError:
	raise RuntimeError("invalid API_KEY and/or API_SECRET") from None
	except TwythonRateLimitError:
	raise RuntimeError("you've hit a rate limit") from None
	except TwythonError:
	return None
	#!/usr/bin/env python3
	# a program that categorizes a user’s tweets as positive or negative.


	import os
	import sys
	import nltk


	from analyzer import Analyzer
	from termcolor import colored
	from helpers import get_user_timeline

	def main():

	# ensure proper usage
	if len(sys.argv) != 2:
	sys.exit("Usage: ./tweets @screen_name")

	# absolute paths to lists of positiv and negative words
	# Dr. Minqing Hu and Prof. Bing Liu of the University of Illinois at Chicago
	# kindly put together lists of 2006 positive words and 4783 negative words
	# https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon
	positives = os.path.join(sys.path[0], "positive-words.txt")
	negatives = os.path.join(sys.path[0], "negative-words.txt")

	# declare amount of tweets to get
	count = 50

	# declare screen_name provided by a user
	screen_name = sys.argv[1]

	# get user's most resent tweets
	# return error messages if not succesful
	try:

	tweets = get_user_timeline(screen_name, count)

	# check if tweets retrived succesfully (the screen name exists and it is not private)
	if tweets is None or not tweets:
	sys.exit(1)

	except RuntimeError as e:
	print(e)
	except:
	sys.exit("No tweets for {}".format(screen_name))

	# instantiate analyzer
	analyzer = Analyzer(positives, negatives)

	# iterate over each tweet in given list of tweets
	for tweet in tweets:

	# analize tweet
	score = analyzer.analyze(tweet)


	if score > 0.0:
	print(colored(" {} {}".format(score, tweet), "green"))
	elif score < 0.0:
	print(colored("{} {}".format(score, tweet), "red"))
	else:
	print(colored(" {} {}".format(score, tweet), "yellow"))


	if __name__ == "__main__":
	main()