AnOnYmOus001100/Sentiment Classifier.py

## Sentiment Classifier.py
'''
Setinment Classifier:
We have provided some synthetic (fake, semi-randomly generated) twitter data in a csv file named project_twitter_data.csv which has the text of a tweet, the number of retweets of that tweet, and the number of replies to that tweet. We have also words that express positive sentiment and negative sentiment, in the files positive_words.txt and negative_words.txt.

Your task is to build a sentiment classifier, which will detect how positive or negative each tweet is. You will create a csv file, which contains columns for the Number of Retweets, Number of Replies, Positive Score (which is how many happy words are in the tweet), Negative Score (which is how many angry words are in the tweet), and the Net Score for each tweet. At the end, you upload the csv file to Excel or Google Sheets, and produce a graph of the Net Score vs Number of Retweets.

'''
punctuation_chars = ["'", '"', ",", ".", "!", ":", ";", '#', '@']
# lists of words to use
positive_words = []
with open("positive_words.txt") as pos_f:
    for lin in pos_f:
        if lin[0] != ';' and lin[0] != '\n':
            positive_words.append(lin.strip())


negative_words = []
with open("negative_words.txt") as pos_f:
    for lin in pos_f:
        if lin[0] != ';' and lin[0] != '\n':
            negative_words.append(lin.strip())
def get_pos(x):
    c=0
    words=x.split()
    for word in words:
        word= strip_punctuation(word)
        if word in positive_words:
            c=c+1
    return c

def get_neg(x):
    c=0
    words=x.split()
    for word in words:
        word= strip_punctuation(word)
        if word in negative_words:
            c=c+1
    return c

def strip_punctuation(s):
    for x in s:
        if x in punctuation_chars:
            s = s.replace(x, "")
    return s

outfile = open("resulting_data.csv","w")
outfile.write("Number of Retweets, Number of Replies, Positive Score, Negative Score, Net Score")
outfile.write('\n')


fileconnection = open("project_twitter_data.csv", 'r')

lines = fileconnection.readlines()
print(lines)
header = lines[0]
field_names = header.strip().split(',')
print(field_names)
for row in lines[1:]:

    vals = row.strip().split(',')
    row_string = '{},{},{},{},{}'.format(vals[1],vals[2],get_pos(vals[0]),get_neg(vals[0]),get_pos(vals[0])-get_neg(vals[0]))
    outfile.write(row_string)
    outfile.write('\n')


outfile.close()
	'''
	Setinment Classifier:
	We have provided some synthetic (fake, semi-randomly generated) twitter data in a csv file named project_twitter_data.csv which has the text of a tweet, the number of retweets of that tweet, and the number of replies to that tweet. We have also words that express positive sentiment and negative sentiment, in the files positive_words.txt and negative_words.txt.

	Your task is to build a sentiment classifier, which will detect how positive or negative each tweet is. You will create a csv file, which contains columns for the Number of Retweets, Number of Replies, Positive Score (which is how many happy words are in the tweet), Negative Score (which is how many angry words are in the tweet), and the Net Score for each tweet. At the end, you upload the csv file to Excel or Google Sheets, and produce a graph of the Net Score vs Number of Retweets.

	'''
	punctuation_chars = ["'", '"', ",", ".", "!", ":", ";", '#', '@']
	# lists of words to use
	positive_words = []
	with open("positive_words.txt") as pos_f:
	for lin in pos_f:
	if lin[0] != ';' and lin[0] != '\n':
	positive_words.append(lin.strip())


	negative_words = []
	with open("negative_words.txt") as pos_f:
	for lin in pos_f:
	if lin[0] != ';' and lin[0] != '\n':
	negative_words.append(lin.strip())
	def get_pos(x):
	c=0
	words=x.split()
	for word in words:
	word= strip_punctuation(word)
	if word in positive_words:
	c=c+1
	return c

	def get_neg(x):
	c=0
	words=x.split()
	for word in words:
	word= strip_punctuation(word)
	if word in negative_words:
	c=c+1
	return c

	def strip_punctuation(s):
	for x in s:
	if x in punctuation_chars:
	s = s.replace(x, "")
	return s

	outfile = open("resulting_data.csv","w")
	outfile.write("Number of Retweets, Number of Replies, Positive Score, Negative Score, Net Score")
	outfile.write('\n')


	fileconnection = open("project_twitter_data.csv", 'r')

	lines = fileconnection.readlines()
	print(lines)
	header = lines[0]
	field_names = header.strip().split(',')
	print(field_names)
	for row in lines[1:]:

	vals = row.strip().split(',')
	row_string = '{},{},{},{},{}'.format(vals[1],vals[2],get_pos(vals[0]),get_neg(vals[0]),get_pos(vals[0])-get_neg(vals[0]))
	outfile.write(row_string)
	outfile.write('\n')


	outfile.close()