Created
April 3, 2020 10:44
-
-
Save 2torus/350000eb50d5eb1da5de0f224e278285 to your computer and use it in GitHub Desktop.
AFINN sentiment analysis - conversion to Python 3 of the original
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# (conversion to python 3 of https://gist.github.com/fnielsen/4183541) | |
# (originally entered at https://gist.github.com/1035399) | |
# | |
# License: GPLv3 | |
# | |
# To download the AFINN word list do: | |
# wget http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/6010/zip/imm6010.zip | |
# unzip imm6010.zip | |
# | |
# Note that for pedagogic reasons there is a UNICODE/UTF-8 error in the code. | |
import math | |
import re | |
import sys | |
#reload(sys) | |
#sys.setdefaultencoding('utf-8') | |
# AFINN-111 is as of June 2011 the most recent version of AFINN | |
filenameAFINN = 'AFINN/AFINN-111.txt' | |
afinn = dict(list(map(lambda w: (w[0], int(w[1])), [ws.strip().split('\t') | |
for ws in open(filenameAFINN)]))) | |
# Word splitter pattern | |
pattern_split = re.compile(r"\W+") | |
def sentiment(text): | |
""" | |
Returns a float for sentiment strength based on the input text. | |
Positive values are positive valence, negative value are negative valence. | |
""" | |
words = pattern_split.split(text.lower()) | |
sentiments = list(map(lambda word: afinn.get(word, 0), words)) | |
if sentiments: | |
# How should you weight the individual word sentiments? | |
# You could do N, sqrt(N) or 1 for example. Here I use sqrt(N) | |
sentiment = float(sum(sentiments))/math.sqrt(len(sentiments)) | |
else: | |
sentiment = 0 | |
return sentiment | |
if __name__ == '__main__': | |
# Single sentence example: | |
text = "Finn is stupid and idiotic" | |
print("%6.2f %s" % (sentiment(text), text)) | |
# No negation and booster words handled in this approach | |
text = "Finn is only a tiny bit stupid and not idiotic" | |
print("%6.2f %s" % (sentiment(text), text)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment