Created
April 16, 2019 02:51
-
-
Save octoparse/fd9e0006794754edfbdaea86de5b1a51 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import json | |
# save the positive words into a list called p_list | |
with open('positive.txt') as f: | |
p_txt = f.read() | |
p_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', p_txt) | |
p_list = p_txt.replace('\n',' ').replace(' ',' ').lower().split(' ') | |
# test if cool is in the list | |
print 'cool is in the postive list: ', 'cool' in p_list | |
# save the negative words into a list called n_list | |
with open('negative.txt') as f: | |
n_txt = f.read() | |
n_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', n_txt) | |
n_list = n_txt.replace('\n',' ').replace(' ',' ').lower().split(' ') | |
# test if abrade is in the list | |
print 'abrade is in the negative list: ', 'abrade' in n_list | |
# test if cool is in the list | |
print 'cool is in the negative list: ', 'cool' in p_list | |
# process the tweets | |
with open('data.txt') as f: | |
txt = f.read() | |
txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', txt) | |
word_list = txt.replace('\n',' ').replace(' ',' ').lower().split(' ') | |
# create empty dictionaries | |
word_count_dict = {} | |
word_count_positive = {} | |
word_count_negative= {} | |
for word in word_list: | |
# count all words frequency | |
if word in word_count_dict.keys(): | |
word_count_dict[word] += 1 | |
else: | |
word_count_dict[word] = 1 | |
# count if it is a positive word | |
if word in p_list: | |
if word in word_count_positive.keys(): | |
word_count_positive[word] += 1 | |
else: | |
word_count_positive[word] = 1 | |
# else see if it is a negative word | |
elif word in n_list: | |
if word in word_count_negative.keys(): | |
word_count_negative[word] += 1 | |
else: | |
word_count_negative[word] = 1 | |
else: # do nothing | |
pass | |
list_dict = sorted(word_count_dict.items(), key=lambda x:x[1], reverse=True) | |
list_positive = sorted(word_count_positive.items(), key=lambda x:x[1], reverse=True) | |
list_negative = sorted(word_count_negative.items(), key=lambda x:x[1], reverse=True) | |
with open('word_count.csv', 'w')as f1: | |
for i in list_dict: | |
f1.write('%s,%s\n' %(i[0],str(i[1]))) | |
with open('word_positive.csv', 'w')as f1: | |
for i in list_positive: | |
f1.write('%s,%s\n' %(i[0],str(i[1]))) | |
with open('word_negative.csv', 'w')as f1: | |
for i in list_negative: | |
f1.write('%s,%s\n' %(i[0],str(i[1]))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment