Created January 31, 2018 15:49
State of the Union WPM chart
needs(rvest, tidyverse, zoo)
df <- c('minutes', 'words') %>%
map(function(var) {
doc <- var %>%
sprintf(fmt = '') %>%
doc %>%
html_node('table tr:nth-of-type(2) table') %>%
html_table(fill = T) %>%
select(X1:X4) %>%
set_names(c('president', 'date', 'type', var)) %>%
filter(row_number() > 10) %>%
mutate(date = as.Date(date, '%B %d, %Y'),
president = ifelse(, president, NA) %>%
na_if('') %>%
na.locf(F)) %>%
filter(! %>%
}) %>%
reduce(left_join) %>%
rename(duration = minutes) %>%
mutate_at(vars(words), as.numeric)
df <- df %>%
mutate(minutes = coalesce(duration %>%
str_split(':') %>%
map(as.numeric) %>%
map_dbl(~ 60 * .x[1] + .x[2] + .x[3] / 60),
duration %>%
str_extract(' ([0-9]+)') %>%
wpm = words / minutes) %>%
mutate(last_name = map_chr(str_split(president, ' '), last)) %>%
mutate(president = factor(president, levels = unique(.$president))) %>%
colors <- c(gray(.25), gray(.5)) %>%
rep_len(nlevels(df$president) - 1) %>%
ggplot(df, aes(date, wpm, color = president)) +
geom_line() +
geom_point() +
geom_text(aes(y = 68, label = last_name), size = 3.5,
data = function(df) df %>%
group_by(president, last_name) %>%
summarize(date = mean(date))) +
scale_color_manual(values = colors) +
ggtitle('State of the Union: Words per Minute') +
theme(axis.title = element_blank(),
legend.pos = 'none',
plot.margin = margin(80, 40, 80, 40))
