Skip to content

Instantly share code, notes, and snippets.

@liamstrilchuk
Created September 11, 2020 17:45
Show Gist options
  • Save liamstrilchuk/c174eb75ccd6899351dc6ae57e3e568b to your computer and use it in GitHub Desktop.
Save liamstrilchuk/c174eb75ccd6899351dc6ae57e3e568b to your computer and use it in GitHub Desktop.
Sentiment Analyzer
import json
import nltk
nltk.download("subjectivity")
nltk.download("punkt")
nltk.download("vader_lexicon")
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import math
def main():
n_instances = 100
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
train_subj_docs = subj_docs[:80]
test_subj_docs = subj_docs[80:100]
train_obj_docs = obj_docs[:80]
test_obj_docs = obj_docs[80:100]
training_docs = train_subj_docs + train_obj_docs
testing_docs = test_subj_docs + test_obj_docs
sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(testing_docs)
trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)
sub_dict = {}
sorted_sub = {}
f = open("sortedsubreddits.txt", "r")
data = json.loads(f.read())
f.close()
for item in data:
sub_dict[item] = [0, 0]
f = open("filteredposts.txt", "r")
posts = json.loads(f.read())
f.close()
sid = SentimentIntensityAnalyzer()
for post in posts:
if post[1] not in sub_dict:
continue
ss = sid.polarity_scores(post[0])
for k in sorted(ss)[:1]:
sub_dict[post[1]][0] += ss[k]
sub_dict[post[1]][1] += 1
for item in sub_dict:
if sub_dict[item][0] == 0 or sub_dict[item][1] == 0:
continue
avg = math.floor(sub_dict[item][0] / sub_dict[item][1] * 1000) / 1000
sorted_sub[item] = avg
sorted_sub_2 = sorted(sorted_sub, key=lambda item: sorted_sub[item])
for item in sorted_sub_2:
f = open("results.txt", "a")
f.write(f"{item}: {sorted_sub[item]}\n")
f.close()
print(f"{item}: {sorted_sub[item]}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment