Last active
March 11, 2019 22:22
-
-
Save johnlaudun/5ea8234cc8d6f39b982648704c3824b0 to your computer and use it in GitHub Desktop.
Python script to compare Sentiment Analyses available in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
''' | |
sentiments.py compares the outputs of the sentimental modules listed below. | |
Functionality to be added: normalization and smoothing. | |
(I haven't implemented the NLTK solution because I don't have classified texts.) | |
''' | |
# Imports | |
import matplotlib.pyplot as plt | |
import seaborn # for more appealing plots | |
from nltk import tokenize | |
import numpy as np | |
# Customizations | |
seaborn.set_style("darkgrid") | |
plt.rcParams['figure.figsize'] = 12, 8 | |
# AFINN | |
def afinn_sentiment(filename): | |
from afinn import Afinn | |
afinn = Afinn() | |
with open (my_file, "r") as myfile: | |
text = myfile.read().replace('\n', ' ') | |
sentences = tokenize.sent_tokenize(text) | |
sentiments = [] | |
for sentence in sentences: | |
sentsent = afinn.score(sentence) | |
sentiments.append(sentsent) | |
return sentiments | |
# TextBlob | |
def textblob_sentiment(filename): | |
from textblob import TextBlob | |
with open (filename, "r") as myfile: | |
text=myfile.read().replace('\n', ' ') | |
blob = TextBlob(text) | |
textsentiments = [] | |
for sentence in blob.sentences: | |
sentsent = sentence.sentiment.polarity | |
textsentiments.append(sentsent) | |
return textsentiments | |
# Indico | |
def indico_sentiment(filename): | |
import indicoio | |
indicoio.config.api_key = 'your_key_here' | |
with open (my_file, "r") as myfile: | |
text = myfile.read().replace('\n', ' ') | |
sentences = tokenize.sent_tokenize(text) | |
indico_sent = indicoio.sentiment(sentences) | |
return indico_sent | |
# Moving Average with TA Library | |
def m_average(a_list, window): | |
from talib import MA | |
ma_array = np.asarray(a_list) | |
return MA(ma_array,window) | |
# Running Mean with Numpy | |
def r_mean(a_list, window): | |
rm_array = np.asarray(a_list) | |
cumsum = np.cumsum(np.insert(rm_array, 0, 0)) | |
return (cumsum[window:] - cumsum[:-window]) / window | |
# BONUS: Min-Max Function | |
def minmax(a_list): | |
the_len = len(a_list) | |
min_val = min(a_list) | |
max_val = max(a_list) | |
the_range = max_val - min_val | |
return (the_len, min_val, max_val, the_range) | |
# Normalization | |
def normed(a_list, norm_min, norm_max): | |
old_min = min(a_list) | |
old_max = max(a_list) | |
old_range = old_max - old_min | |
new_range = norm_max - norm_min | |
output = [float((n - old_min) / old_range * new_range + norm_min) for n in a_list] | |
return output | |
# Plotting | |
def plot_sentiments(filename, annotation): | |
fig = plt.figure() | |
plt.title("Comparison of Sentiment Libraries") | |
plt.plot(afinn_sentiment(filename), label = "Afinn") | |
plt.plot(textblob_sentiment(filename), label = "TextBlob") | |
# plt.plot(indico_sentiment(filename), label = "Indico") | |
plt.ylabel("Emotional Valence") | |
plt.xlabel("Sentence #") | |
plt.legend(loc='lower right') | |
plt.annotate(annotation, xy=(30, 2)) | |
def avg_plots(filename, window): | |
fig = plt.figure() | |
plt.title("Averaged Sentiment") | |
plt.plot(r_mean(afinn_sentiment(filename), window), label = "Afinn NP Running") | |
plt.plot(r_mean(textblob_sentiment(filename), window), label = "TextBlob NP Running") | |
# plt.plot(r_mean(indico_sentiment(filename), window), label = "Indico NP Running") | |
# plt.plot(m_average(afinn_sentiment(filename), window), label = "Afinn TA Moving") | |
# plt.plot(m_average(textblob_sentiment(filename), window), label = "TextBlob TA Moving") | |
# plt.plot(m_average(indico_sentiment(filename), window), label = "Indico TA Moving") | |
plt.ylabel("Emotional Valence") | |
plt.xlabel("Sentence #") | |
plt.legend(loc='lower center') | |
def normed_sentiment(filename): | |
fig = plt.figure() | |
plt.title("Comparison of Sentiment Libraries - Normalized") | |
plt.plot(normed(afinn_sentiment(filename), -1.0, 1.0), label = "Afinn") | |
plt.plot(normed(textblob_sentiment(filename), -1.0, 1.0), label = "TextBlob") | |
# plt.plot(normed(indico_sentiment(filename), -1.0, 1.0), label = "Indico") | |
plt.ylabel("Emotional Valence") | |
plt.xlabel("Sentence #") | |
plt.legend(loc='lower center') | |
def normavg_sentiment(filename, window): | |
fig = plt.figure() | |
plt.title("Comparison of Sentiment Libraries - Normalized and then Averaged Window={}".format(window)) | |
plt.plot(r_mean(normed(afinn_sentiment(filename), -1.0, 1.0), window), label = "Afinn") | |
plt.plot(r_mean(normed(textblob_sentiment(filename), -1.0, 1.0), window), label = "TextBlob") | |
# plt.plot(r_mean(normed(indico_sentiment(filename), -1.0, 1.0), window), label = "Indico") | |
plt.ylabel("Emotional Valence") | |
plt.xlabel("Sentence #") | |
plt.legend(loc='lower center') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment