Skip to content

Instantly share code, notes, and snippets.

@octoparse
Created April 16, 2019 02:51
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save octoparse/fd9e0006794754edfbdaea86de5b1a51 to your computer and use it in GitHub Desktop.
Save octoparse/fd9e0006794754edfbdaea86de5b1a51 to your computer and use it in GitHub Desktop.
import re
import json
# save the positive words into a list called p_list
with open('positive.txt') as f:
p_txt = f.read()
p_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', p_txt)
p_list = p_txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
# test if cool is in the list
print 'cool is in the postive list: ', 'cool' in p_list
# save the negative words into a list called n_list
with open('negative.txt') as f:
n_txt = f.read()
n_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', n_txt)
n_list = n_txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
# test if abrade is in the list
print 'abrade is in the negative list: ', 'abrade' in n_list
# test if cool is in the list
print 'cool is in the negative list: ', 'cool' in p_list
# process the tweets
with open('data.txt') as f:
txt = f.read()
txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', txt)
word_list = txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
# create empty dictionaries
word_count_dict = {}
word_count_positive = {}
word_count_negative= {}
for word in word_list:
# count all words frequency
if word in word_count_dict.keys():
word_count_dict[word] += 1
else:
word_count_dict[word] = 1
# count if it is a positive word
if word in p_list:
if word in word_count_positive.keys():
word_count_positive[word] += 1
else:
word_count_positive[word] = 1
# else see if it is a negative word
elif word in n_list:
if word in word_count_negative.keys():
word_count_negative[word] += 1
else:
word_count_negative[word] = 1
else: # do nothing
pass
list_dict = sorted(word_count_dict.items(), key=lambda x:x[1], reverse=True)
list_positive = sorted(word_count_positive.items(), key=lambda x:x[1], reverse=True)
list_negative = sorted(word_count_negative.items(), key=lambda x:x[1], reverse=True)
with open('word_count.csv', 'w')as f1:
for i in list_dict:
f1.write('%s,%s\n' %(i[0],str(i[1])))
with open('word_positive.csv', 'w')as f1:
for i in list_positive:
f1.write('%s,%s\n' %(i[0],str(i[1])))
with open('word_negative.csv', 'w')as f1:
for i in list_negative:
f1.write('%s,%s\n' %(i[0],str(i[1])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment