rer145/positive_dracula.R

## positive_dracula.R
library(gutenbergr)
library(tidytext)
library(dplyr)
library(ggplot2)

# Using Dracula as our example text, download it from Project Gutenberg
dracula<-gutenberg_download(345)

# Split each line of text into individual words
dracula<-dracula%>%
  unnest_tokens(word, text)

# Use the Bing sentiment from tidytext and join it to the words in Dracula
bing<-get_sentiments('bing')
dracula<-inner_join(dracula, bing)

# Filter out negative words
dracula<-dracula%>%
  filter(sentiment=='positive')

# Count up the occurrences of each word and arrange them in descending order
# Here we only want the top 10 words
words<-dracula%>%
  group_by(word)%>%
  summarize(count=n())%>%
  arrange(desc(count))%>%
  top_n(10)

# Convert the word field to a factor, so we can preserve our ordering in the plot
words$word<-factor(words$word, levels=words$word)

# Plot the words and counts with ggplot2
ggplot()+
  geom_bar(data=words, aes(x=word,y=count), stat="identity")+
  xlab("Word")+
  ylab("Count")+
  ggtitle("Top 10 Positive words in Dracula")
	library(gutenbergr)
	library(tidytext)
	library(dplyr)
	library(ggplot2)

	# Using Dracula as our example text, download it from Project Gutenberg
	dracula<-gutenberg_download(345)

	# Split each line of text into individual words
	dracula<-dracula%>%
	unnest_tokens(word, text)

	# Use the Bing sentiment from tidytext and join it to the words in Dracula
	bing<-get_sentiments('bing')
	dracula<-inner_join(dracula, bing)

	# Filter out negative words
	dracula<-dracula%>%
	filter(sentiment=='positive')

	# Count up the occurrences of each word and arrange them in descending order
	# Here we only want the top 10 words
	words<-dracula%>%
	group_by(word)%>%
	summarize(count=n())%>%
	arrange(desc(count))%>%
	top_n(10)

	# Convert the word field to a factor, so we can preserve our ordering in the plot
	words$word<-factor(words$word, levels=words$word)

	# Plot the words and counts with ggplot2
	ggplot()+
	geom_bar(data=words, aes(x=word,y=count), stat="identity")+
	xlab("Word")+
	ylab("Count")+
	ggtitle("Top 10 Positive words in Dracula")