Created
September 8, 2017 03:33
-
-
Save OxiBo/288fb796c0f79f5b835875400670eefc to your computer and use it in GitHub Desktop.
CS50 pset6 Sentiments - Implement a website that generates a pie chart categorizing a user’s tweets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
class Analyzer(): | |
"""Implements sentiment analysis.""" | |
def __init__(self, positives, negatives): | |
"""Initialize Analyzer.""" | |
# load list of positive words in memory | |
self.positives = set() | |
file1 = open(positives, "r") | |
for line in file1: | |
if not line.startswith(";") and not line.startswith("\n"): | |
self.positives.add(line.strip("\n")) | |
file1.close() | |
# load list of negative words in memory | |
self.negatives = set() | |
file2 = open(negatives, "r") | |
for line in file2: | |
if not line.startswith(";") and not line.startswith("\n"): | |
self.negatives.add(line.strip("\n")) | |
file2.close() | |
def analyze(self, text): | |
"""Analyze text for sentiment, returning its score.""" | |
# declare a variable to store total score of a given text(list of strings) | |
self.total_score=0 | |
# instantiate TweetTokenizer and tokenize given text | |
tokenizer = nltk.tokenize.TweetTokenizer() | |
tokens = tokenizer.tokenize(text) | |
# iterate over each string in tokens and checking if it is in | |
# the list of positive or negative words | |
for word in tokens: | |
if str.lower(word) in self.positives: | |
score = 1 | |
elif str.lower(word) in self.negatives: | |
score = -1 | |
else: | |
score = 0 | |
self.total_score+=score | |
return self.total_score | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask, redirect, render_template, request, url_for | |
import os | |
import sys | |
import helpers | |
from analyzer import Analyzer | |
app = Flask(__name__) | |
@app.route("/") | |
def index(): | |
return render_template("index.html") | |
@app.route("/search") | |
def search(): | |
# validate screen_name | |
screen_name = request.args.get("screen_name", "") | |
if not screen_name: | |
return redirect(url_for("index")) | |
# declare amount of tweets to get | |
count = 100 | |
# get screen_name's tweets | |
tweets = helpers.get_user_timeline(screen_name, count) | |
# redirect to index if tweets have not been retrived succesfully | |
# (the screen name does not exist or it is private) | |
if tweets == None or not tweets: | |
return redirect(url_for("index")) | |
# absolute paths to list of positive and negative words | |
# Dr. Minqing Hu and Prof. Bing Liu of the University of Illinois at Chicago | |
# kindly put together lists of 2006 positive words and 4783 negative words | |
# https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon | |
positives = os.path.join(sys.path[0], "positive-words.txt") | |
negatives = os.path.join(sys.path[0], "negative-words.txt") | |
positive, negative, neutral = 0.0, 0.0, 0.0 | |
# instantiate analyzer | |
analyzer = Analyzer(positives, negatives) | |
# iterate over each tweet in given list of tweets | |
for tweet in tweets: | |
# analize tweet | |
score = analyzer.analyze(tweet) | |
if score > 0.0: | |
positive += 1 | |
elif score < 0.0: | |
negative += 1 | |
else: | |
neutral += 1 | |
# generate chart | |
chart = helpers.chart(positive, negative, neutral) | |
# render results | |
return render_template("search.html", chart=chart, screen_name=screen_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import html | |
import os | |
import plotly | |
import socket | |
from twython import Twython | |
from twython import TwythonAuthError, TwythonError, TwythonRateLimitError | |
def chart(positive, negative, neutral): | |
"""Return a pie chart for specified sentiments as HTML.""" | |
# offline plot | |
# https://plot.ly/python/pie-charts/ | |
# https://plot.ly/python/reference/#pie | |
figure = { | |
"data": [ | |
{ | |
"labels": ["positive", "negative", "neutral"], | |
"hoverinfo": "none", | |
"marker": { | |
"colors": [ | |
"rgb(0,255,00)", | |
"rgb(255,0,0)", | |
"rgb(255,255,0)" | |
] | |
}, | |
"type": "pie", | |
"values": [positive, negative, neutral] | |
} | |
], | |
"layout": { | |
"showlegend": True | |
} | |
} | |
return plotly.offline.plot(figure, output_type="div", show_link=False, link_text=False) | |
def get_user_timeline(screen_name, count=200): | |
"""Return list of most recent tweets posted by screen_name.""" | |
# ensure count is valid | |
if count < 1 or count > 200: | |
raise RuntimeError("invalid count") | |
# ensure environment variables are set | |
if not os.environ.get("API_KEY"): | |
raise RuntimeError("API_KEY not set") | |
if not os.environ.get("API_SECRET"): | |
raise RuntimeError("API_SECRET not set") | |
# get screen_name's (or @screen_name's) most recent tweets | |
# https://dev.twitter.com/rest/reference/get/users/lookup | |
# https://dev.twitter.com/rest/reference/get/statuses/user_timeline | |
# https://github.com/ryanmcgrath/twython/blob/master/twython/endpoints.py | |
try: | |
twitter = Twython(os.environ.get("API_KEY"), os.environ.get("API_SECRET")) | |
user = twitter.lookup_user(screen_name=screen_name.lstrip("@")) | |
if user[0]["protected"]: | |
return None | |
tweets = twitter.get_user_timeline(screen_name=screen_name, count=count) | |
return [html.unescape(tweet["text"].replace("\n", " ")) for tweet in tweets] | |
except TwythonAuthError: | |
raise RuntimeError("invalid API_KEY and/or API_SECRET") from None | |
except TwythonRateLimitError: | |
raise RuntimeError("you've hit a rate limit") from None | |
except TwythonError: | |
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{% extends "layout.html" %} | |
{% block body %} | |
<form action="{{ url_for('search') }}" class="form-inline" method="get"> | |
<div class="form-group"> | |
<label class="sr-only" for="screen_name">screen_name</label> | |
<input class="form-control" name="screen_name" placeholder="@screen_name" type="text"/> | |
</div> | |
<button class="btn btn-default" type="submit">Search</button> | |
</form> | |
{% endblock %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<!-- http://getbootstrap.com/ --> | |
<link href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" rel="stylesheet"/> | |
<meta charset="utf-8"/> | |
<meta content="initial-scale=1, width=device-width" name="viewport"/> | |
<meta http-equiv="X-UA-Compatible" content="IE=edge"/> | |
<title>Sentiments</title> | |
</head> | |
<body> | |
<div class="container"> | |
<div class="page-header"> | |
<h1> | |
Sentiments | |
<small>{% block subtext %}{% endblock %}</small> | |
</h1> | |
</div> | |
{% block body %}{% endblock %} | |
</div> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{% extends "layout.html" %} | |
{% block subtext %} | |
@{{ screen_name }} | |
{% endblock %} | |
{% block body %} | |
{{ chart | safe }} | |
{% endblock %} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment