soodoku/basic_sentiment_analysis.py

## basic_sentiment_analysis.py
'''
Basic Sentiment Analysis
Builds on:
    https://finnaarupnielsen.wordpress.com/2011/06/20/simplest-sentiment-analysis-in-python-with-af/

    Utilizes AFINN or a custom sentiment db

    Example Snippets at end from: https://code.google.com/p/sentana/wiki/ExampleSentiments
'''

import re
import math
import os


# AFINN-111 is as of June 2011 the most recent version of AFINN. Replace with newer.
filenameAFINN = os.path.dirname(__file__) + '/AFINN/AFINN-111.txt'
afinn = dict(map(lambda (w, s): (w, int(s)), [
ws.strip().split('\t') for ws in open(filenameAFINN) ]))

# Word splitter pattern
pattern_split = re.compile(r"\W+")

def sentiment_AFINN(text):
    """
    Returns a float for sentiment strength based on the input text.
    Positive values are positive valence, negative value are negative valence.
    """
    words = pattern_split.split(text.lower())
    sentiments = map(lambda word: afinn.get(word, 0), words)
    if sentiments:
        # How should you weight the individual word sentiments?
        # You could do N, sqrt(N) or 1 for example. Here I use sqrt(N)
        _sentiment = float(sum(sentiments))/math.sqrt(len(sentiments))
    else:
        _sentiment = 0
    return _sentiment

# Using custom WORDDB
filenameWORDDB = os.path.dirname(__file__) + '/sentimentworddb.txt'
worddb = dict(map(lambda (w, s): (w, int(s) if int(s) != 0 else -1), [
ws.strip().split('\t') for ws in open(filenameWORDDB) ]))
re_str = "(%s)([^ ]*)" % '|'.join([re.escape(w.replace('*', '')) for w in worddb if w.endswith('*')])
re_str += "|(%s)[\s\.\,\;\?\!]" % '|'.join([re.escape(w) for w in worddb if not w.endswith('*')])
re_worddb = re.compile(re_str)

def sentiment_WORDDB(text):
    """
    Returns a float for sentiment strength based on the input text.
    Positive values are positive valence, negative value are negative valence.
    """
    sentiments = []
    for s in re_worddb.finditer(text):
        if s.group(1):
            sentiments.append(worddb[s.group(1) + '*'])
        elif s.group(3):
            sentiments.append(worddb[s.group(3)])

    return sum(sentiments)

if __name__ == "__main__":
    print("========== AFINN Test cases ==========")
    print(sentiment_AFINN("ibm is not going at cloud alone. We have an ecosystem of partners helping us."))
    print(sentiment_AFINN("I have an iPhone, but I am not really feeling very happy about the iPhone."))
    print(sentiment_AFINN("I love Macintosh!"))
    print(sentiment_AFINN("I hate microsoft excel. I'm about to punch this computer!!!! "))
    print(sentiment_AFINN("I really love my iPhone, but the reception here is very bad."))
    print(sentiment_AFINN("I'm afraid, I cannot corruption"))

    print("========== WORDDB Test cases ==========")
    print(sentiment_WORDDB("ibm is not going at cloud alone. We have an ecosystem of partners helping us."))
    print(sentiment_WORDDB("I have an iPhone, but I am not really feeling very happy about the iPhone."))
    print(sentiment_WORDDB("I love Macintosh!"))
    print(sentiment_WORDDB("I hate microsoft excel. I'm about to punch this computer!!!! "))
    print(sentiment_WORDDB("I really love my iPhone, but the reception here is very bad."))
    print(sentiment_WORDDB("I'm afraid, I cannot corruption"))
	'''
	Basic Sentiment Analysis
	Builds on:
	https://finnaarupnielsen.wordpress.com/2011/06/20/simplest-sentiment-analysis-in-python-with-af/

	Utilizes AFINN or a custom sentiment db

	Example Snippets at end from: https://code.google.com/p/sentana/wiki/ExampleSentiments
	'''

	import re
	import math
	import os


	# AFINN-111 is as of June 2011 the most recent version of AFINN. Replace with newer.
	filenameAFINN = os.path.dirname(__file__) + '/AFINN/AFINN-111.txt'
	afinn = dict(map(lambda (w, s): (w, int(s)), [
	ws.strip().split('\t') for ws in open(filenameAFINN) ]))

	# Word splitter pattern
	pattern_split = re.compile(r"\W+")

	def sentiment_AFINN(text):
	"""
	Returns a float for sentiment strength based on the input text.
	Positive values are positive valence, negative value are negative valence.
	"""
	words = pattern_split.split(text.lower())
	sentiments = map(lambda word: afinn.get(word, 0), words)
	if sentiments:
	# How should you weight the individual word sentiments?
	# You could do N, sqrt(N) or 1 for example. Here I use sqrt(N)
	_sentiment = float(sum(sentiments))/math.sqrt(len(sentiments))
	else:
	_sentiment = 0
	return _sentiment

	# Using custom WORDDB
	filenameWORDDB = os.path.dirname(__file__) + '/sentimentworddb.txt'
	worddb = dict(map(lambda (w, s): (w, int(s) if int(s) != 0 else -1), [
	ws.strip().split('\t') for ws in open(filenameWORDDB) ]))
	re_str = "(%s)([^ ])" % '\|'.join([re.escape(w.replace('', '')) for w in worddb if w.endswith('*')])
	re_str += "\|(%s)[\s\.\,\;\?\!]" % '\|'.join([re.escape(w) for w in worddb if not w.endswith('*')])
	re_worddb = re.compile(re_str)

	def sentiment_WORDDB(text):
	"""
	Returns a float for sentiment strength based on the input text.
	Positive values are positive valence, negative value are negative valence.
	"""
	sentiments = []
	for s in re_worddb.finditer(text):
	if s.group(1):
	sentiments.append(worddb[s.group(1) + '*'])
	elif s.group(3):
	sentiments.append(worddb[s.group(3)])

	return sum(sentiments)

	if __name__ == "__main__":
	print("========== AFINN Test cases ==========")
	print(sentiment_AFINN("ibm is not going at cloud alone. We have an ecosystem of partners helping us."))
	print(sentiment_AFINN("I have an iPhone, but I am not really feeling very happy about the iPhone."))
	print(sentiment_AFINN("I love Macintosh!"))
	print(sentiment_AFINN("I hate microsoft excel. I'm about to punch this computer!!!! "))
	print(sentiment_AFINN("I really love my iPhone, but the reception here is very bad."))
	print(sentiment_AFINN("I'm afraid, I cannot corruption"))

	print("========== WORDDB Test cases ==========")
	print(sentiment_WORDDB("ibm is not going at cloud alone. We have an ecosystem of partners helping us."))
	print(sentiment_WORDDB("I have an iPhone, but I am not really feeling very happy about the iPhone."))
	print(sentiment_WORDDB("I love Macintosh!"))
	print(sentiment_WORDDB("I hate microsoft excel. I'm about to punch this computer!!!! "))
	print(sentiment_WORDDB("I really love my iPhone, but the reception here is very bad."))
	print(sentiment_WORDDB("I'm afraid, I cannot corruption"))