katerinabc/sentiment_analysis. R

## sentiment_analysis. R
# bing and nrc are two different dictionaries. These dictionaries provide an emotional value to each word.
bing <- get_sentiments('bing')
nrc <- get_sentiments('nrc')

# split the mission text by word, remove the stop words, and count the country per words.
mission_words <- ms %>% unnest_tokens(word, mission) %>%
anti_join(stop_words) %>%
count(country, word, sort=T) %>%
ungroup()

total_words <- mission_words %>% group_by(country) %>% summarize(total=sum(n))

# attach the dictionary 'bing' to the data set. In ms_df each word is on a row.
# The line inner_join adds the sentiment to the word that is in a row.

ms_sent <- ms_df %>%
  inner_join(get_sentiments('bing'), by=c('word' = 'word')) %>%
  count(country, sentiment) %>%
  spread(sentiment, n, fill=0) %>%
  mutate(sentiment = positive - negative)

# count the positive and negative words per country
ms_sent <- ms_sent %>% left_join(total_words)
ms_sent <- ms_sent %>% mutate(neg_st = negative/total,
                              pos_st = positive/total,
                              sentiment_normalized = sentiment/total)
# plot the results
ggplot(ms_sent, aes(country, sentiment_normalized)) + geom_col() +
  labs(title='Normalized Sentiment Score of Words in mission Text',
  x = 'Country',
  y = 'Normalized Sentiment Score')
ggsave('Sentiment_analysis_mission_text.png', path = mypath)
	# bing and nrc are two different dictionaries. These dictionaries provide an emotional value to each word.
	bing <- get_sentiments('bing')
	nrc <- get_sentiments('nrc')

	# split the mission text by word, remove the stop words, and count the country per words.
	mission_words <- ms %>% unnest_tokens(word, mission) %>%
	anti_join(stop_words) %>%
	count(country, word, sort=T) %>%
	ungroup()

	total_words <- mission_words %>% group_by(country) %>% summarize(total=sum(n))

	# attach the dictionary 'bing' to the data set. In ms_df each word is on a row.
	# The line inner_join adds the sentiment to the word that is in a row.

	ms_sent <- ms_df %>%
	inner_join(get_sentiments('bing'), by=c('word' = 'word')) %>%
	count(country, sentiment) %>%
	spread(sentiment, n, fill=0) %>%
	mutate(sentiment = positive - negative)

	# count the positive and negative words per country
	ms_sent <- ms_sent %>% left_join(total_words)
	ms_sent <- ms_sent %>% mutate(neg_st = negative/total,
	pos_st = positive/total,
	sentiment_normalized = sentiment/total)
	# plot the results
	ggplot(ms_sent, aes(country, sentiment_normalized)) + geom_col() +
	labs(title='Normalized Sentiment Score of Words in mission Text',
	x = 'Country',
	y = 'Normalized Sentiment Score')
	ggsave('Sentiment_analysis_mission_text.png', path = mypath)