Skip to content

Instantly share code, notes, and snippets.

@k5cents
Last active September 23, 2020 19:11
Show Gist options
  • Save k5cents/5dabffdb9e3ef45a0e5170170cf28db5 to your computer and use it in GitHub Desktop.
Save k5cents/5dabffdb9e3ef45a0e5170170cf28db5 to your computer and use it in GitHub Desktop.
Comparing past and future Presidential election turnout
library(patchwork)
library(tidyverse)
library(lubridate)
library(jsonlite)
library(predictr)
library(scales)
library(rvest)
# get vap estimate --------------------------------------------------------
# read from census bureau
pop_est <- read_csv(file = "https://bit.ly/3mKlL6I")
# find nationwide total
pop <- pop_est$POPEST18PLUS2019[pop_est$NAME == "United States"]/1e6
# read past turnout from wikipedia
turnout <- read_html("https://w.wiki/dAj")
turnout <- turnout %>%
html_node(".wikitable") %>%
html_table() %>%
as_tibble() %>%
set_names(c("election", "vap", "turnout", "prop")) %>%
filter(vap != "No data") %>%
type_convert(
na = "",
col_types = cols(
prop = col_number()
)
)
# plot market prices ------------------------------------------------------
# scrape predictit api
prices <- market_price(6882)
# shorten contract names
con_labs <- prices$contract %>%
str_replace("Fewer than ", "< ") %>%
str_replace("(160) mil. or more", "> \\1") %>%
str_remove_all("[a-z]") %>%
str_remove_all("\\.") %>%
str_squish()
# convert to percentages
pop_prop <- function(n, p = 255.2004) {
scales::percent(as.numeric(n)/pop, 0.1)
}
con_labs <- str_replace_all(con_labs, "\\d+", pop_prop)
# reassign as ordered factor
prices$contract <- factor(prices$contract, labels = con_labs)
# plot past turnout -------------------------------------------------------
# first tue after first mon in nov
# need date for 26a line
election_dates <- as.Date(character())
for (y in seq(1932, 2016, by = 4)) {
elec_day <- ymd(paste(y, 11, 1))
week_day <- wday(elec_day)
while (week_day != 2) {
elec_day <- elec_day + 1
week_day <- wday(elec_day)
}
election_dates <- election_dates %>%
append(elec_day + 1)
}
# plot turnout history
turnout_past <- turnout %>%
filter(election >= 1932) %>%
mutate(
date = election_dates,
prop = prop/100
) %>%
ggplot(aes(date, prop)) +
geom_vline(xintercept = as.Date("1971-07-01"), linetype = 2) +
geom_line(size = 1) +
geom_point(aes(color = prop), size = 5) +
scale_size_continuous(labels = percent, range = c(1, 10), guide = FALSE) +
scale_color_viridis_c(guide = FALSE, end = 0.75) +
scale_y_continuous(labels = scales::percent) +
scale_x_date(date_breaks = "8 years", labels = lubridate::year) +
coord_cartesian(ylim = c(0.475, 0.65)) +
geom_label(
mapping = aes(
x = as.Date("1982-01-01"),
y = 0.5875,
label = "26A lowers voting age"
)
) +
theme(
legend.position = "bottom",
axis.title.x = element_text(hjust = 1)
) +
labs(
title = "Voter Turnout in Past Presidential Elections",
subtitle = "Percentage of voting age population",
caption = "Source: US Census Bureau",
x = "Election Date",
y = "Turnout"
)
# geom with rectangles ----------------------------------------------------
# calculate rect shape
price_rect <- prices %>%
mutate(
date = as.Date("2020-11-03"),
range = str_extract_all(contract, "[0-9]{2}(?:\\.[0-9]+)?"),
low = as.double(map_chr(range, `[`, 1)),
high = as.double(map_chr(range, `[`, 2)),
middle = coalesce(low + (low - high), low, high)/100
) %>%
select(contract, low, high, last)
# fix high and low end values
price_rect$high[1] <- price_rect$low[1]
price_rect$low[1] <- price_rect$low[1] - 1.2
price_rect$high[12] <- price_rect$low[12] + 1.2
# create sideways histogram
turnout_rect <- price_rect %>%
mutate(across(2:3, `/`, 100)) %>%
mutate(mid = low + ((high - low)/2)) %>%
ggplot() +
geom_rect(
mapping = aes(
fill = high,
ymin = low,
ymax = high,
xmin = 0,
xmax = last
)
) +
scale_fill_viridis_c(guide = FALSE, end = 0.75) +
coord_cartesian(ylim = c(0.475, 0.65)) +
labs(
title = "Predicted Turnout",
subtitle = "Contract prices estimate probability",
caption = "Source: PredictIt/6882",
x = "Contract Price",
y = "Turnout Bracket"
) +
scale_x_continuous(labels = dollar) +
scale_y_continuous(position = "right") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.x = element_text(hjust = 1),
axis.title.y = element_text(margin = margin(l = 20))
)
# combine plots
turnout_both <-
turnout_past +
turnout_rect +
plot_layout(widths = c(5, 2))
ggsave(
filename = "~/Pictures/turnout_both.png",
plot = turnout_both,
height = 6,
width = 12,
dpi = "retina"
)
@k5cents
Copy link
Author

k5cents commented Sep 23, 2020

turnout_both

@k5cents
Copy link
Author

k5cents commented Sep 23, 2020

This graph was made in R using ggplot and my predictr package. The source code can be found on GitHub. Past turnout comes from this Wikipedia page. Future turnout comes from this prediction market.

The histogram of turnout predictions comes from the PredictIt.org prediction market, where traders buy and sell binary futures contracts with real money. In this market, each contract is tied to a range of turnout. As an outcome becomes more or less likely, demand for a given contract rises and falls and the equilibrium price adjusts to reflect an underlying probability.

It's important to note that the smallest) and largest brackets (fewer than 130 million and more than 160 million) are open ended. That's why they're more likely outcomes than the others nearby. Wasn't quite sure how to convey this without making the bars wider and make them look even more likely.

The turnout ranges were converted to a percentage of voting age population using the 2019 estimates from the Census Bureau.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment