Skip to content

Instantly share code, notes, and snippets.

@awhstin
Created December 16, 2016 16:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save awhstin/9fc77ad98528527bb9743e71dcbf1f94 to your computer and use it in GitHub Desktop.
Save awhstin/9fc77ad98528527bb9743e71dcbf1f94 to your computer and use it in GitHub Desktop.
library(rvest)
library(tidytext)
library(dplyr)
library(stringr)
library(tidyr)
library(ggplot2)
library(viridis)
library(ggthemes)
library(syuzhet)
#get our data
url<-read_html('http://www.historyplace.com/speeches/previous.htm')
urls <- url %>%
html_nodes("a")%>%
html_attr('href')
name <- url %>%
html_nodes("font a")%>%
html_text()%>%
str_replace_all( "[\n]" , "")
rurls<-head(urls,40)%>%unlist()
rname<-head(name,40)
data<-data.frame(urls=rurls,name=rname,stringsAsFactors = F)
data$link<-paste0('http://www.historyplace.com/speeches/',data$urls)
#get the speeches
speeches<-NULL
for(i in 1:nrow(data)){
speech <- data$link[i] %>%
read_html() %>%
html_nodes("p")%>%
html_text()
speechwords<-unlist(strsplit(speech," "))
line_number<-1:length(speechwords)
size<-length(speechwords)
name<-data$name[i]
indiv.speeches <-data.frame(word=speechwords,line_number,name,size,stringsAsFactors = FALSE)
speeches<-rbind(speeches,indiv.speeches)
}
#big speeches
big.speeches<-filter(speeches,size>2000)
#tidy
tidy.speeches<-big.speeches %>% unnest_tokens(word,word)
data("stop_words")
tidy.speeches <- tidy.speeches %>% anti_join(stop_words)
tidy.speeches %>% count(word, sort = TRUE)
tidy.speeches$name<- gsub("\\s+"," ",tidy.speeches$name)
#sentiment
bing <- sentiments %>%
filter(lexicon == "bing") %>%
select(-score)
#join and sentiment
speech.sentiment <- tidy.speeches %>%
inner_join(bing) %>%
count(name, index = line_number %/% 80, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
#plot etc
ggplot(speech.sentiment, aes(index, sentiment, color = name)) +
geom_line(stat = "identity", show.legend = FALSE) +
facet_wrap(~name, ncol = 2, scales = "free_x") +
theme_minimal(base_size = 13) +
labs(title = "Sentiment in Famous Speeches", y = "Sentiment") +
scale_color_viridis(end = 0.75, discrete=TRUE, direction = -1,option = 'C') +
scale_x_discrete(expand=c(0.02,0)) +
theme(strip.text=element_text(hjust=0)) +
theme(strip.text = element_text(face = "italic")) +
theme(axis.title.x=element_blank()) +
theme(axis.ticks.x=element_blank()) +
theme(axis.text.x=element_blank())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment