Skip to content

Instantly share code, notes, and snippets.

@joshkatz
Created January 31, 2018 15:49
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save joshkatz/c4fd0bdd0e2a06720b71d62d0b94c739 to your computer and use it in GitHub Desktop.
Save joshkatz/c4fd0bdd0e2a06720b71d62d0b94c739 to your computer and use it in GitHub Desktop.
State of the Union WPM chart
needs(rvest, tidyverse, zoo)
df <- c('minutes', 'words') %>%
map(function(var) {
doc <- var %>%
sprintf(fmt = 'http://www.presidency.ucsb.edu/sou_%s.php') %>%
read_html()
doc %>%
html_node('table tr:nth-of-type(2) table') %>%
html_table(fill = T) %>%
select(X1:X4) %>%
set_names(c('president', 'date', 'type', var)) %>%
filter(row_number() > 10) %>%
mutate(date = as.Date(date, '%B %d, %Y'),
president = ifelse(is.na(date), president, NA) %>%
na_if('') %>%
na.locf(F)) %>%
filter(!is.na(date)) %>%
select(-type)
}) %>%
reduce(left_join) %>%
rename(duration = minutes) %>%
mutate_at(vars(words), as.numeric)
df <- df %>%
mutate(minutes = coalesce(duration %>%
str_split(':') %>%
map(as.numeric) %>%
map_dbl(~ 60 * .x[1] + .x[2] + .x[3] / 60),
duration %>%
str_extract(' ([0-9]+)') %>%
as.numeric()),
wpm = words / minutes) %>%
mutate(last_name = map_chr(str_split(president, ' '), last)) %>%
mutate(president = factor(president, levels = unique(.$president))) %>%
filter(!is.na(minutes))
colors <- c(gray(.25), gray(.5)) %>%
rep_len(nlevels(df$president) - 1) %>%
c('red')
ggplot(df, aes(date, wpm, color = president)) +
geom_line() +
geom_point() +
geom_text(aes(y = 68, label = last_name), size = 3.5,
data = function(df) df %>%
group_by(president, last_name) %>%
summarize(date = mean(date))) +
scale_color_manual(values = colors) +
ggtitle('State of the Union: Words per Minute') +
theme(axis.title = element_blank(),
legend.pos = 'none',
plot.margin = margin(80, 40, 80, 40))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment