Created
September 8, 2017 03:24
-
-
Save OxiBo/f58d6c206d542fa8ae39b34ed6aeb39f to your computer and use it in GitHub Desktop.
CS50 pset6 Sentiments/smile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
class Analyzer(): | |
"""Implements sentiment analysis.""" | |
def __init__(self, positives, negatives): | |
"""Initialize Analyzer.""" | |
# load list of positive words in memory | |
self.positives = set() | |
file1 = open(positives, "r") | |
for line in file1: | |
if not line.startswith(";") and not line.startswith("\n"): | |
self.positives.add(line.strip("\n")) | |
file1.close() | |
# load list of negative words in memory | |
self.negatives = set() | |
file2 = open(negatives, "r") | |
for line in file2: | |
if not line.startswith(";") and not line.startswith("\n"): | |
self.negatives.add(line.strip("\n")) | |
file2.close() | |
def analyze(self, text): | |
"""Analyze text for sentiment, returning its score.""" | |
# declare a variable to store total score of a given text(list of strings) | |
self.total_score=0 | |
# instantiate TweetTokenizer and tokenize given text | |
tokenizer = nltk.tokenize.TweetTokenizer() | |
tokens = tokenizer.tokenize(text) | |
# iterate over each string in tokens and checking if it is in | |
# the list of positive or negative words | |
for word in tokens: | |
if str.lower(word) in self.positives: | |
score = 1 | |
elif str.lower(word) in self.negatives: | |
score = -1 | |
else: | |
score = 0 | |
self.total_score+=score | |
return self.total_score | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# a program that categorizes a word as positive or negative | |
import os | |
import sys | |
from analyzer import Analyzer | |
from termcolor import colored | |
def main(): | |
# ensure proper usage | |
if len(sys.argv) != 2: | |
sys.exit("Usage: ./smile word") | |
# absolute paths to lists | |
# Dr. Minqing Hu and Prof. Bing Liu of the University of Illinois at Chicago | |
# kindly put together lists of 2006 positive words and 4783 negative words | |
# https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon | |
positives = os.path.join(sys.path[0], "positive-words.txt") | |
negatives = os.path.join(sys.path[0], "negative-words.txt") | |
# instantiate analyzer | |
analyzer = Analyzer(positives, negatives) | |
# analyze word | |
score = analyzer.analyze(sys.argv[1]) | |
if score > 0.0: | |
print(colored(":)", "green")) | |
elif score < 0.0: | |
print(colored(":(", "red")) | |
else: | |
print(colored(":|", "yellow")) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment