OxiBo/analyzer.py

## analyzer.py
import nltk

class Analyzer():
    """Implements sentiment analysis."""

    def __init__(self, positives, negatives):
        """Initialize Analyzer."""

        # load list of positive words in memory
        self.positives = set()
        file1 = open(positives, "r")
        for line in file1:
            if not line.startswith(";") and not line.startswith("\n"):
                self.positives.add(line.strip("\n"))
        file1.close()

        # load list of negative words in memory
        self.negatives = set()
        file2 = open(negatives, "r")
        for line in file2:
            if not line.startswith(";") and not line.startswith("\n"):
                self.negatives.add(line.strip("\n"))
        file2.close()


    def analyze(self, text):
        """Analyze text for sentiment, returning its score."""
        # declare a variable to store total score of a given text(list of strings)
        self.total_score=0

        # instantiate TweetTokenizer and tokenize given text
        tokenizer = nltk.tokenize.TweetTokenizer()
        tokens = tokenizer.tokenize(text)

        # iterate over each string in tokens and checking if it is in
        # the list of positive or negative words
        for word in tokens:
            if str.lower(word) in self.positives:
                score = 1
            elif str.lower(word) in self.negatives:
                score = -1
            else:
                score = 0

            self.total_score+=score

        return self.total_score


## application.py
from flask import Flask, redirect, render_template, request, url_for

import os
import sys
import helpers
from analyzer import Analyzer

app = Flask(__name__)

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/search")
def search():

    # validate screen_name
    screen_name = request.args.get("screen_name", "")
    if not screen_name:
        return redirect(url_for("index"))


    # declare amount of tweets to get
    count = 100

    # get screen_name's tweets
    tweets = helpers.get_user_timeline(screen_name, count)

    # redirect to index if tweets have not been retrived succesfully
    # (the screen name does not exist or it is private)
    if tweets == None or not tweets:
        return redirect(url_for("index"))

    # absolute paths to list of positive and negative words
    # Dr. Minqing Hu and Prof. Bing Liu of the University of Illinois at Chicago
    # kindly put together lists of 2006 positive words and 4783 negative words
    # https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon
    positives = os.path.join(sys.path[0], "positive-words.txt")
    negatives = os.path.join(sys.path[0], "negative-words.txt")


    positive, negative, neutral = 0.0, 0.0, 0.0

    # instantiate analyzer
    analyzer = Analyzer(positives, negatives)

    # iterate over each tweet in given list of tweets
    for tweet in tweets:

        # analize tweet
        score = analyzer.analyze(tweet)

        if score > 0.0:
            positive += 1
        elif score < 0.0:
            negative += 1
        else:
            neutral += 1

    # generate chart
    chart = helpers.chart(positive, negative, neutral)

    # render results
    return render_template("search.html", chart=chart, screen_name=screen_name)

## helpers.py
import html
import os
import plotly
import socket

from twython import Twython
from twython import TwythonAuthError, TwythonError, TwythonRateLimitError

def chart(positive, negative, neutral):
    """Return a pie chart for specified sentiments as HTML."""

    # offline plot
    # https://plot.ly/python/pie-charts/
    # https://plot.ly/python/reference/#pie
    figure = {
        "data": [
            {
                "labels": ["positive", "negative", "neutral"],
                "hoverinfo": "none",
                "marker": {
                    "colors": [
                        "rgb(0,255,00)",
                        "rgb(255,0,0)",
                        "rgb(255,255,0)"
                    ]
                },
                "type": "pie",
                "values": [positive, negative, neutral]
            }
        ],
        "layout": {
            "showlegend": True
            }
    }
    return plotly.offline.plot(figure, output_type="div", show_link=False, link_text=False)

def get_user_timeline(screen_name, count=200):
    """Return list of most recent tweets posted by screen_name."""

    # ensure count is valid
    if count < 1 or count > 200:
        raise RuntimeError("invalid count")

    # ensure environment variables are set
    if not os.environ.get("API_KEY"):
        raise RuntimeError("API_KEY not set")
    if not os.environ.get("API_SECRET"):
        raise RuntimeError("API_SECRET not set")

    # get screen_name's (or @screen_name's) most recent tweets
    # https://dev.twitter.com/rest/reference/get/users/lookup
    # https://dev.twitter.com/rest/reference/get/statuses/user_timeline
    # https://github.com/ryanmcgrath/twython/blob/master/twython/endpoints.py
    try:
        twitter = Twython(os.environ.get("API_KEY"), os.environ.get("API_SECRET"))
        user = twitter.lookup_user(screen_name=screen_name.lstrip("@"))
        if user[0]["protected"]:
            return None
        tweets = twitter.get_user_timeline(screen_name=screen_name, count=count)
        return [html.unescape(tweet["text"].replace("\n", " ")) for tweet in tweets]
    except TwythonAuthError:
        raise RuntimeError("invalid API_KEY and/or API_SECRET") from None
    except TwythonRateLimitError:
        raise RuntimeError("you've hit a rate limit") from None
    except TwythonError:
        return None

## index.html
{% extends "layout.html" %}

{% block body %}
<form action="{{ url_for('search') }}" class="form-inline" method="get">
    <div class="form-group">
      <label class="sr-only" for="screen_name">screen_name</label>
      <input class="form-control" name="screen_name" placeholder="@screen_name" type="text"/>
  </div>
  <button class="btn btn-default" type="submit">Search</button>
</form>
{% endblock %}

## layout.html
<!DOCTYPE html>

<html lang="en">
    <head>
        <!-- http://getbootstrap.com/ -->
        <link href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" rel="stylesheet"/>
        <meta charset="utf-8"/>
        <meta content="initial-scale=1, width=device-width" name="viewport"/>
        <meta http-equiv="X-UA-Compatible" content="IE=edge"/>
        <title>Sentiments</title>
    </head>
    <body>
        <div class="container">
            <div class="page-header">
                <h1>
                    Sentiments
                    <small>{% block subtext %}{% endblock %}</small>
                </h1>
            </div>
            {% block body %}{% endblock %}
        </div>
    </body>
</html>

## search.html
{% extends "layout.html" %}

{% block subtext %}
@{{ screen_name }}
{% endblock %}

{% block body %}
{{ chart | safe }}
{% endblock %}
	import nltk

	class Analyzer():
	"""Implements sentiment analysis."""

	def __init__(self, positives, negatives):
	"""Initialize Analyzer."""

	# load list of positive words in memory
	self.positives = set()
	file1 = open(positives, "r")
	for line in file1:
	if not line.startswith(";") and not line.startswith("\n"):
	self.positives.add(line.strip("\n"))
	file1.close()

	# load list of negative words in memory
	self.negatives = set()
	file2 = open(negatives, "r")
	for line in file2:
	if not line.startswith(";") and not line.startswith("\n"):
	self.negatives.add(line.strip("\n"))
	file2.close()



	def analyze(self, text):
	"""Analyze text for sentiment, returning its score."""
	# declare a variable to store total score of a given text(list of strings)
	self.total_score=0

	# instantiate TweetTokenizer and tokenize given text
	tokenizer = nltk.tokenize.TweetTokenizer()
	tokens = tokenizer.tokenize(text)

	# iterate over each string in tokens and checking if it is in
	# the list of positive or negative words
	for word in tokens:
	if str.lower(word) in self.positives:
	score = 1
	elif str.lower(word) in self.negatives:
	score = -1
	else:
	score = 0

	self.total_score+=score

	return self.total_score
	from flask import Flask, redirect, render_template, request, url_for

	import os
	import sys
	import helpers
	from analyzer import Analyzer

	app = Flask(__name__)

	@app.route("/")
	def index():
	return render_template("index.html")

	@app.route("/search")
	def search():

	# validate screen_name
	screen_name = request.args.get("screen_name", "")
	if not screen_name:
	return redirect(url_for("index"))


	# declare amount of tweets to get
	count = 100

	# get screen_name's tweets
	tweets = helpers.get_user_timeline(screen_name, count)

	# redirect to index if tweets have not been retrived succesfully
	# (the screen name does not exist or it is private)
	if tweets == None or not tweets:
	return redirect(url_for("index"))

	# absolute paths to list of positive and negative words
	# Dr. Minqing Hu and Prof. Bing Liu of the University of Illinois at Chicago
	# kindly put together lists of 2006 positive words and 4783 negative words
	# https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon
	positives = os.path.join(sys.path[0], "positive-words.txt")
	negatives = os.path.join(sys.path[0], "negative-words.txt")


	positive, negative, neutral = 0.0, 0.0, 0.0

	# instantiate analyzer
	analyzer = Analyzer(positives, negatives)

	# iterate over each tweet in given list of tweets
	for tweet in tweets:

	# analize tweet
	score = analyzer.analyze(tweet)

	if score > 0.0:
	positive += 1
	elif score < 0.0:
	negative += 1
	else:
	neutral += 1

	# generate chart
	chart = helpers.chart(positive, negative, neutral)

	# render results
	return render_template("search.html", chart=chart, screen_name=screen_name)
	import html
	import os
	import plotly
	import socket

	from twython import Twython
	from twython import TwythonAuthError, TwythonError, TwythonRateLimitError

	def chart(positive, negative, neutral):
	"""Return a pie chart for specified sentiments as HTML."""

	# offline plot
	# https://plot.ly/python/pie-charts/
	# https://plot.ly/python/reference/#pie
	figure = {
	"data": [
	{
	"labels": ["positive", "negative", "neutral"],
	"hoverinfo": "none",
	"marker": {
	"colors": [
	"rgb(0,255,00)",
	"rgb(255,0,0)",
	"rgb(255,255,0)"
	]
	},
	"type": "pie",
	"values": [positive, negative, neutral]
	}
	],
	"layout": {
	"showlegend": True
	}
	}
	return plotly.offline.plot(figure, output_type="div", show_link=False, link_text=False)

	def get_user_timeline(screen_name, count=200):
	"""Return list of most recent tweets posted by screen_name."""

	# ensure count is valid
	if count < 1 or count > 200:
	raise RuntimeError("invalid count")

	# ensure environment variables are set
	if not os.environ.get("API_KEY"):
	raise RuntimeError("API_KEY not set")
	if not os.environ.get("API_SECRET"):
	raise RuntimeError("API_SECRET not set")

	# get screen_name's (or @screen_name's) most recent tweets
	# https://dev.twitter.com/rest/reference/get/users/lookup
	# https://dev.twitter.com/rest/reference/get/statuses/user_timeline
	# https://github.com/ryanmcgrath/twython/blob/master/twython/endpoints.py
	try:
	twitter = Twython(os.environ.get("API_KEY"), os.environ.get("API_SECRET"))
	user = twitter.lookup_user(screen_name=screen_name.lstrip("@"))
	if user[0]["protected"]:
	return None
	tweets = twitter.get_user_timeline(screen_name=screen_name, count=count)
	return [html.unescape(tweet["text"].replace("\n", " ")) for tweet in tweets]
	except TwythonAuthError:
	raise RuntimeError("invalid API_KEY and/or API_SECRET") from None
	except TwythonRateLimitError:
	raise RuntimeError("you've hit a rate limit") from None
	except TwythonError:
	return None
	{% extends "layout.html" %}

	{% block body %}
	<form action="{{ url_for('search') }}" class="form-inline" method="get">
	<div class="form-group">
	<label class="sr-only" for="screen_name">screen_name</label>
	<input class="form-control" name="screen_name" placeholder="@screen_name" type="text"/>
	</div>
	<button class="btn btn-default" type="submit">Search</button>
	</form>
	{% endblock %}
	<!DOCTYPE html>

	<html lang="en">
	<head>
	<!-- http://getbootstrap.com/ -->
	<link href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" rel="stylesheet"/>
	<meta charset="utf-8"/>
	<meta content="initial-scale=1, width=device-width" name="viewport"/>
	<meta http-equiv="X-UA-Compatible" content="IE=edge"/>
	<title>Sentiments</title>
	</head>
	<body>
	<div class="container">
	<div class="page-header">
	<h1>
	Sentiments
	<small>{% block subtext %}{% endblock %}</small>
	</h1>
	</div>
	{% block body %}{% endblock %}
	</div>
	</body>
	</html>
	{% extends "layout.html" %}

	{% block subtext %}
	@{{ screen_name }}
	{% endblock %}

	{% block body %}
	{{ chart \| safe }}
	{% endblock %}