Skip to content

Instantly share code, notes, and snippets.

@soodoku
Last active November 14, 2015 05:51
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save soodoku/22e4cff2eb6a05be3c0d to your computer and use it in GitHub Desktop.
Save soodoku/22e4cff2eb6a05be3c0d to your computer and use it in GitHub Desktop.
Basic sentiment analysis with AFINN or custom word database
'''
Basic Sentiment Analysis
Builds on:
https://finnaarupnielsen.wordpress.com/2011/06/20/simplest-sentiment-analysis-in-python-with-af/
Utilizes AFINN or a custom sentiment db
Example Snippets at end from: https://code.google.com/p/sentana/wiki/ExampleSentiments
'''
import re
import math
import os
# AFINN-111 is as of June 2011 the most recent version of AFINN. Replace with newer.
filenameAFINN = os.path.dirname(__file__) + '/AFINN/AFINN-111.txt'
afinn = dict(map(lambda (w, s): (w, int(s)), [
ws.strip().split('\t') for ws in open(filenameAFINN) ]))
# Word splitter pattern
pattern_split = re.compile(r"\W+")
def sentiment_AFINN(text):
"""
Returns a float for sentiment strength based on the input text.
Positive values are positive valence, negative value are negative valence.
"""
words = pattern_split.split(text.lower())
sentiments = map(lambda word: afinn.get(word, 0), words)
if sentiments:
# How should you weight the individual word sentiments?
# You could do N, sqrt(N) or 1 for example. Here I use sqrt(N)
_sentiment = float(sum(sentiments))/math.sqrt(len(sentiments))
else:
_sentiment = 0
return _sentiment
# Using custom WORDDB
filenameWORDDB = os.path.dirname(__file__) + '/sentimentworddb.txt'
worddb = dict(map(lambda (w, s): (w, int(s) if int(s) != 0 else -1), [
ws.strip().split('\t') for ws in open(filenameWORDDB) ]))
re_str = "(%s)([^ ]*)" % '|'.join([re.escape(w.replace('*', '')) for w in worddb if w.endswith('*')])
re_str += "|(%s)[\s\.\,\;\?\!]" % '|'.join([re.escape(w) for w in worddb if not w.endswith('*')])
re_worddb = re.compile(re_str)
def sentiment_WORDDB(text):
"""
Returns a float for sentiment strength based on the input text.
Positive values are positive valence, negative value are negative valence.
"""
sentiments = []
for s in re_worddb.finditer(text):
if s.group(1):
sentiments.append(worddb[s.group(1) + '*'])
elif s.group(3):
sentiments.append(worddb[s.group(3)])
return sum(sentiments)
if __name__ == "__main__":
print("========== AFINN Test cases ==========")
print(sentiment_AFINN("ibm is not going at cloud alone. We have an ecosystem of partners helping us."))
print(sentiment_AFINN("I have an iPhone, but I am not really feeling very happy about the iPhone."))
print(sentiment_AFINN("I love Macintosh!"))
print(sentiment_AFINN("I hate microsoft excel. I'm about to punch this computer!!!! "))
print(sentiment_AFINN("I really love my iPhone, but the reception here is very bad."))
print(sentiment_AFINN("I'm afraid, I cannot corruption"))
print("========== WORDDB Test cases ==========")
print(sentiment_WORDDB("ibm is not going at cloud alone. We have an ecosystem of partners helping us."))
print(sentiment_WORDDB("I have an iPhone, but I am not really feeling very happy about the iPhone."))
print(sentiment_WORDDB("I love Macintosh!"))
print(sentiment_WORDDB("I hate microsoft excel. I'm about to punch this computer!!!! "))
print(sentiment_WORDDB("I really love my iPhone, but the reception here is very bad."))
print(sentiment_WORDDB("I'm afraid, I cannot corruption"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment