Created
October 7, 2021 23:52
-
-
Save dbrby/2d5f475555ce0d97bc0bb8a8ffb449f4 to your computer and use it in GitHub Desktop.
Average Words Per Sentence for Con21 Speeches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install.packages("pacman") | |
pacman::p_load(rvest, tidyverse, quanteda) | |
links <- c("https://www.conservatives.com/news/prime-minister-boris-johnson-speech-conference-2021", | |
"https://www.conservatives.com/news/sajid-javids-speech-to-conservative-party-conference", | |
"https://www.conservatives.com/news/taking-the-tough-decisions-to-cut-crime", | |
"https://www.conservatives.com/news/we-will-make-sure-every-victim-sees-justice-done", | |
"https://www.conservatives.com/news/rishi-sunak-speech-conference-2021", | |
"https://www.conservatives.com/news/the-network-of-liberty", | |
"https://www.conservatives.com/news/a-decent-tolerant-party-delivering-for-britain") | |
speakers <- c("Boris Johnson", "Sajid Javid", "Priti Patel", "Dominic Raab", | |
"Rishi Sunak", "Liz Truss", "Oliver Dowden") | |
pgs <- lapply(links, read_html) | |
text <- lapply(pgs, function(textgetter) { | |
textgetter %>% html_elements("blockquote") %>% | |
html_text() %>% paste(., collapse = ' ') | |
}) %>% unlist() | |
df <- tibble(links, speakers, text) | |
df$ntok <- ntoken(df$text) | |
df$nsen <- nsentence(df$text) | |
df$wps <- df$ntok/df$nsen | |
ggplot(df, aes(wps, speakers)) + geom_point() + | |
ylab("Speakers") + xlab("Average Words per Sentence") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment