Instantly share code, notes, and snippets.

Embed
What would you like to do?
How to match word sentiments to words with R
library(dplyr)
library(tidytext)
library(gutenbergr)
library(ggplot2)
# Get our data, the text of Frankenstein
dracula<-gutenberg_download(345)
# Remove the gutenberg_id field since we don't need it
dracula$gutenberg_id<-NULL
# Using the dim() function we can give each row a line number (15568 total rows)
dracula$line<-1:15568
# Split each line of text into words
words<-dracula%>%
unnest_tokens(word, text)
# Typically, we want to analyze the sentiment in chunks of 80 lines. Use the modulo operator to group the words
words$grouping<-words$line %/% 80
# The tidytext package has different sentiments available, here we are using bing and afinn.
bing<-get_sentiments('bing')
afinn<-get_sentiments('afinn')
# Now create two different data frames that join the sentiment with the words
words_afinn<-inner_join(words, afinn)
words_bing<-inner_join(words, bing)
# With the bing sentiment, we can assign a +1 or -1 score to positive and negative
# afinn already has a score included for the severity of the word
words_bing$score<-1
negrows<-which(words_bing$sentiment=='negative')
words_bing$score[negrows]<--1
# Summarize each grouping of text to see how positive or negative it is
sent_afinn<-words_afinn%>%
group_by(grouping)%>%
summarize(value=sum(score))
sent_bing<-words_bing%>%
group_by(grouping)%>%
summarize(value=sum(score))
# Optionally, plot each dataframe to compare the scoring between them
ggplot()+
geom_line(data=sent_afinn, aes(x=grouping,y=value), color='orange')+
geom_line(data=sent_bing, aes(x=grouping,y=value), color='black')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment