Last active
February 2, 2019 21:11
-
-
Save katerinabc/4923f99e24f9f593cd5ce07ef573248e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# bing and nrc are two different dictionaries. These dictionaries provide an emotional value to each word. | |
bing <- get_sentiments('bing') | |
nrc <- get_sentiments('nrc') | |
# split the mission text by word, remove the stop words, and count the country per words. | |
mission_words <- ms %>% unnest_tokens(word, mission) %>% | |
anti_join(stop_words) %>% | |
count(country, word, sort=T) %>% | |
ungroup() | |
total_words <- mission_words %>% group_by(country) %>% summarize(total=sum(n)) | |
# attach the dictionary 'bing' to the data set. In ms_df each word is on a row. | |
# The line inner_join adds the sentiment to the word that is in a row. | |
ms_sent <- ms_df %>% | |
inner_join(get_sentiments('bing'), by=c('word' = 'word')) %>% | |
count(country, sentiment) %>% | |
spread(sentiment, n, fill=0) %>% | |
mutate(sentiment = positive - negative) | |
# count the positive and negative words per country | |
ms_sent <- ms_sent %>% left_join(total_words) | |
ms_sent <- ms_sent %>% mutate(neg_st = negative/total, | |
pos_st = positive/total, | |
sentiment_normalized = sentiment/total) | |
# plot the results | |
ggplot(ms_sent, aes(country, sentiment_normalized)) + geom_col() + | |
labs(title='Normalized Sentiment Score of Words in mission Text', | |
x = 'Country', | |
y = 'Normalized Sentiment Score') | |
ggsave('Sentiment_analysis_mission_text.png', path = mypath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment