Instantly share code, notes, and snippets.

# soodoku/basic_sentiment_analysis.py Last active Nov 14, 2015

Basic sentiment analysis with AFINN or custom word database
 ''' Basic Sentiment Analysis Builds on: https://finnaarupnielsen.wordpress.com/2011/06/20/simplest-sentiment-analysis-in-python-with-af/ Utilizes AFINN or a custom sentiment db Example Snippets at end from: https://code.google.com/p/sentana/wiki/ExampleSentiments ''' import re import math import os # AFINN-111 is as of June 2011 the most recent version of AFINN. Replace with newer. filenameAFINN = os.path.dirname(__file__) + '/AFINN/AFINN-111.txt' afinn = dict(map(lambda (w, s): (w, int(s)), [ ws.strip().split('\t') for ws in open(filenameAFINN) ])) # Word splitter pattern pattern_split = re.compile(r"\W+") def sentiment_AFINN(text): """ Returns a float for sentiment strength based on the input text. Positive values are positive valence, negative value are negative valence. """ words = pattern_split.split(text.lower()) sentiments = map(lambda word: afinn.get(word, 0), words) if sentiments: # How should you weight the individual word sentiments? # You could do N, sqrt(N) or 1 for example. Here I use sqrt(N) _sentiment = float(sum(sentiments))/math.sqrt(len(sentiments)) else: _sentiment = 0 return _sentiment # Using custom WORDDB filenameWORDDB = os.path.dirname(__file__) + '/sentimentworddb.txt' worddb = dict(map(lambda (w, s): (w, int(s) if int(s) != 0 else -1), [ ws.strip().split('\t') for ws in open(filenameWORDDB) ])) re_str = "(%s)([^ ]*)" % '|'.join([re.escape(w.replace('*', '')) for w in worddb if w.endswith('*')]) re_str += "|(%s)[\s\.\,\;\?\!]" % '|'.join([re.escape(w) for w in worddb if not w.endswith('*')]) re_worddb = re.compile(re_str) def sentiment_WORDDB(text): """ Returns a float for sentiment strength based on the input text. Positive values are positive valence, negative value are negative valence. """ sentiments = [] for s in re_worddb.finditer(text): if s.group(1): sentiments.append(worddb[s.group(1) + '*']) elif s.group(3): sentiments.append(worddb[s.group(3)]) return sum(sentiments) if __name__ == "__main__": print("========== AFINN Test cases ==========") print(sentiment_AFINN("ibm is not going at cloud alone. We have an ecosystem of partners helping us.")) print(sentiment_AFINN("I have an iPhone, but I am not really feeling very happy about the iPhone.")) print(sentiment_AFINN("I love Macintosh!")) print(sentiment_AFINN("I hate microsoft excel. I'm about to punch this computer!!!! ")) print(sentiment_AFINN("I really love my iPhone, but the reception here is very bad.")) print(sentiment_AFINN("I'm afraid, I cannot corruption")) print("========== WORDDB Test cases ==========") print(sentiment_WORDDB("ibm is not going at cloud alone. We have an ecosystem of partners helping us.")) print(sentiment_WORDDB("I have an iPhone, but I am not really feeling very happy about the iPhone.")) print(sentiment_WORDDB("I love Macintosh!")) print(sentiment_WORDDB("I hate microsoft excel. I'm about to punch this computer!!!! ")) print(sentiment_WORDDB("I really love my iPhone, but the reception here is very bad.")) print(sentiment_WORDDB("I'm afraid, I cannot corruption"))
to join this conversation on GitHub. Already have an account? Sign in to comment