Created
November 11, 2017 00:53
-
-
Save rer145/7abd5805bd9e0033330b38ec316f4c04 to your computer and use it in GitHub Desktop.
Using facet_wrap in ggplot to compare plots side by side.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the libraries used | |
library(gutenbergr) | |
library(tidytext) | |
library(dplyr) | |
library(ggplot2) | |
# Download text from Project Gutenberg | |
dracula<-gutenberg_download(345) | |
# Split the lines of text into words | |
dracula<-dracula%>% | |
unnest_tokens(word, text) | |
# Join the words with the bing sentiment | |
bing<-get_sentiments('bing') | |
dracula<-inner_join(dracula, bing) | |
# Filter out the top 10 used positive words, keeping the sentiment column | |
words_pos<-dracula%>% | |
filter(sentiment=='positive')%>% | |
group_by(word)%>% | |
summarize(count=n(), sentiment=first(sentiment))%>% | |
arrange(count)%>% | |
top_n(10, wt=count) | |
# Filter out the top 10 used negative words, keeping the sentiment column | |
words_neg<-dracula%>% | |
filter(sentiment=='negative')%>% | |
group_by(word)%>% | |
summarize(count=n(), sentiment=first(sentiment))%>% | |
arrange(count)%>% | |
top_n(10, wt=count) | |
# Convert the word column to a factor for plotting/ordering | |
words_pos$word<-factor(words_pos$word, levels=words_pos$word) | |
words_neg$word<-factor(words_neg$word, levels=words_neg$word) | |
# Row bind the two dataframes into one for plotting | |
words<-rbind(words_pos, words_neg) | |
# Plot both, using the sentiment column as the grouping | |
ggplot()+ | |
geom_bar(data=words, aes(x=word, y=count), stat="identity")+ | |
xlab("Word")+ | |
ylab("Count")+ | |
coord_flip()+ | |
ggtitle("Top 10 Positive/Negative Words in Dracula")+ | |
facet_wrap(~sentiment, scales='free_y') | |
# Plot both, changing colors for each grouping (e.g. sentiment) | |
ggplot()+ | |
geom_bar(data=words, aes(x=word, y=count, fill=sentiment, color=sentiment), stat="identity")+ | |
xlab("Word")+ | |
ylab("Count")+ | |
coord_flip()+ | |
ggtitle("Top 10 Positive/Negative Words in Dracula")+ | |
facet_wrap(~sentiment, scales='free_y')+ | |
scale_fill_manual(values=c('#000000', '#ea6205'))+ | |
scale_color_manual(values=c('#ea6205', '#000000')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment