Created
September 11, 2020 17:45
-
-
Save liamstrilchuk/c174eb75ccd6899351dc6ae57e3e568b to your computer and use it in GitHub Desktop.
Sentiment Analyzer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import nltk | |
nltk.download("subjectivity") | |
nltk.download("punkt") | |
nltk.download("vader_lexicon") | |
from nltk.classify import NaiveBayesClassifier | |
from nltk.corpus import subjectivity | |
from nltk.sentiment import SentimentAnalyzer | |
from nltk.sentiment.util import * | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
import math | |
def main(): | |
n_instances = 100 | |
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]] | |
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]] | |
train_subj_docs = subj_docs[:80] | |
test_subj_docs = subj_docs[80:100] | |
train_obj_docs = obj_docs[:80] | |
test_obj_docs = obj_docs[80:100] | |
training_docs = train_subj_docs + train_obj_docs | |
testing_docs = test_subj_docs + test_obj_docs | |
sentim_analyzer = SentimentAnalyzer() | |
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs]) | |
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4) | |
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) | |
training_set = sentim_analyzer.apply_features(training_docs) | |
test_set = sentim_analyzer.apply_features(testing_docs) | |
trainer = NaiveBayesClassifier.train | |
classifier = sentim_analyzer.train(trainer, training_set) | |
sub_dict = {} | |
sorted_sub = {} | |
f = open("sortedsubreddits.txt", "r") | |
data = json.loads(f.read()) | |
f.close() | |
for item in data: | |
sub_dict[item] = [0, 0] | |
f = open("filteredposts.txt", "r") | |
posts = json.loads(f.read()) | |
f.close() | |
sid = SentimentIntensityAnalyzer() | |
for post in posts: | |
if post[1] not in sub_dict: | |
continue | |
ss = sid.polarity_scores(post[0]) | |
for k in sorted(ss)[:1]: | |
sub_dict[post[1]][0] += ss[k] | |
sub_dict[post[1]][1] += 1 | |
for item in sub_dict: | |
if sub_dict[item][0] == 0 or sub_dict[item][1] == 0: | |
continue | |
avg = math.floor(sub_dict[item][0] / sub_dict[item][1] * 1000) / 1000 | |
sorted_sub[item] = avg | |
sorted_sub_2 = sorted(sorted_sub, key=lambda item: sorted_sub[item]) | |
for item in sorted_sub_2: | |
f = open("results.txt", "a") | |
f.write(f"{item}: {sorted_sub[item]}\n") | |
f.close() | |
print(f"{item}: {sorted_sub[item]}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment