rer145/sentiments.R

## sentiments.R
library(dplyr)
library(tidytext)
library(gutenbergr)
library(ggplot2)

# Get our data, the text of Frankenstein
dracula<-gutenberg_download(345)

# Remove the gutenberg_id field since we don't need it
dracula$gutenberg_id<-NULL

# Using the dim() function we can give each row a line number (15568 total rows)
dracula$line<-1:15568

# Split each line of text into words
words<-dracula%>%
  unnest_tokens(word, text)

# Typically, we want to analyze the sentiment in chunks of 80 lines. Use the modulo operator to group the words
words$grouping<-words$line %/% 80

# The tidytext package has different sentiments available, here we are using bing and afinn.
bing<-get_sentiments('bing')
afinn<-get_sentiments('afinn')

# Now create two different data frames that join the sentiment with the words
words_afinn<-inner_join(words, afinn)
words_bing<-inner_join(words, bing)

# With the bing sentiment, we can assign a +1 or -1 score to positive and negative
# afinn already has a score included for the severity of the word
words_bing$score<-1
negrows<-which(words_bing$sentiment=='negative')
words_bing$score[negrows]<--1

# Summarize each grouping of text to see how positive or negative it is
sent_afinn<-words_afinn%>%
  group_by(grouping)%>%
  summarize(value=sum(score))

sent_bing<-words_bing%>%
  group_by(grouping)%>%
  summarize(value=sum(score))


# Optionally, plot each dataframe to compare the scoring between them
ggplot()+
  geom_line(data=sent_afinn, aes(x=grouping,y=value), color='orange')+
  geom_line(data=sent_bing, aes(x=grouping,y=value), color='black')
	library(dplyr)
	library(tidytext)
	library(gutenbergr)
	library(ggplot2)

	# Get our data, the text of Frankenstein
	dracula<-gutenberg_download(345)

	# Remove the gutenberg_id field since we don't need it
	dracula$gutenberg_id<-NULL

	# Using the dim() function we can give each row a line number (15568 total rows)
	dracula$line<-1:15568

	# Split each line of text into words
	words<-dracula%>%
	unnest_tokens(word, text)

	# Typically, we want to analyze the sentiment in chunks of 80 lines. Use the modulo operator to group the words
	words$grouping<-words$line %/% 80

	# The tidytext package has different sentiments available, here we are using bing and afinn.
	bing<-get_sentiments('bing')
	afinn<-get_sentiments('afinn')

	# Now create two different data frames that join the sentiment with the words
	words_afinn<-inner_join(words, afinn)
	words_bing<-inner_join(words, bing)

	# With the bing sentiment, we can assign a +1 or -1 score to positive and negative
	# afinn already has a score included for the severity of the word
	words_bing$score<-1
	negrows<-which(words_bing$sentiment=='negative')
	words_bing$score[negrows]<--1

	# Summarize each grouping of text to see how positive or negative it is
	sent_afinn<-words_afinn%>%
	group_by(grouping)%>%
	summarize(value=sum(score))

	sent_bing<-words_bing%>%
	group_by(grouping)%>%
	summarize(value=sum(score))


	# Optionally, plot each dataframe to compare the scoring between them
	ggplot()+
	geom_line(data=sent_afinn, aes(x=grouping,y=value), color='orange')+
	geom_line(data=sent_bing, aes(x=grouping,y=value), color='black')