Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Created September 24, 2020 19:44
Show Gist options
  • Save chrishanretty/22cc18d066e4b7b6d4c81806e6bd1b45 to your computer and use it in GitHub Desktop.
Save chrishanretty/22cc18d066e4b7b6d4c81806e6bd1b45 to your computer and use it in GitHub Desktop.
Towns fund analysis
### Start by plotting stuff
library(tidyverse)
library(hrbrthemes)
## Download data from https://docs.google.com/spreadsheets/d/1inaI17l9Q-XADRNE_toXLp65sKu4_x4bXUFjZ3qjB2o/edit?usp=sharing
dat <- read.csv("wherever_i_saved_the_google_docs.csv")
### Estimate a simple model which we'll use when graphing
mod <- glm(I(Funded == "Yes") ~ Rank2 + ConWinner1,
family = binomial,
data = dat)
###
ann.df <- expand.grid(Rank2 = unique(dat$Rank2),
ConWinner1 = c(FALSE, TRUE))
ann.df$Funded <- plogis(predict(mod, newdata = ann.df))
dat$Funded <- as.numeric(dat$Funded == "Yes")
p1 <- ggplot(dat,
aes(x = Rank2,
y = Funded,
colour = ConWinner1)) +
geom_point(position = position_jitter(width = 0, height = 0.1),
alpha = 4/5,
size = 1.5) +
scale_x_continuous("Ranking within region\n(Positive values indicate ranks above cut-off)") +
scale_y_continuous("Funded", breaks = c(0, 1), labels = c("No", "Yes")) +
scale_colour_discrete("Conservative-held area") +
geom_line(data = ann.df, linetype = 2, size = 1.5) +
labs(title = "What the data in fact looks like",
subtitle = "53 of 61 areas are wholly or partly in Conservative seats",
caption = "Data: NAO. Analysis: @chrishanretty") +
theme_ipsum_rc() +
theme(legend.position = "bottom")
### Counterfactual plot
dat$counterfactual <- as.numeric(dat$Rank2 >= 0)
p0 <- ggplot(dat,
aes(x = Rank2,
y = counterfactual,
colour = ConWinner1)) +
geom_point(position = position_jitter(width = 0, height = 0.1),
alpha = 4/5,
size = 2) +
scale_x_continuous("Ranking within region\n(Positive values indicate ranks above cut-off)") +
scale_y_continuous("Funded", breaks = c(0, 1), labels = c("No", "Yes")) +
scale_colour_discrete("Conservative-held area") +
labs(title = "What the data would have looked like\nhad ministers followed the civil service",
subtitle = "38 out 60 funded areas are wholly or partly in Conservative seats",
caption = "Data: NAO. Analysis: @chrishanretty") +
geom_vline(xintercept = 0, linetype = 2, size = 1.5) +
theme_ipsum_rc() +
theme(legend.position = "bottom")
ggsave(p0, file = "../figure/p0.png")
ggsave(p1, file = "../figure/p1.png")
### Now model stuff
library(tidyverse)
library(hrbrthemes)
library(margins)
library(mgcv)
library(gt)
library(modelsummary)
library(broom)
### Estimate a simple model which we'll use when graphing
mod1 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2,
family = binomial,
data = dat)
summary(margins(mod1))
mod2 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 + log(Pop),
family = binomial,
data = dat)
mod3 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop),
family = binomial,
data = dat)
mod4 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop) + PriorityGrouping,
family = binomial,
data = dat)
mod5 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop) + PriorityGrouping + Region,
family = binomial,
data = dat)
msummary(list(mod1, mod2, mod3, mod4, mod5),
coef_map = c("ConWinner1TRUE" = "Town in Conservative-held seat",
"Rank2" = "Places ahead of region cut-off",
"log(Pop)" = "log(Population in thousands)",
"PriorityGroupingMedium" = "Medium (v. low) priority group",
"RegionEast of England" = "East of England",
"RegionNorth East" = "North East",
"RegionNorth West" = "North West",
"RegionSouth East" = "South East",
"RegionSouth West" = "South West",
"RegionWest Midlands" = "W Midlands",
"RegionYorkshire and the Humber" = "Yorks and Humber",
"Rank2:log(Pop)" = "Places ahead of cut-off times log population"),
output = "gt") %>%
gtsave(filename = "tmp.html")
### What does our spline model look like?
summary(mod6 <- gam(I(Funded == "Yes") ~ s(ConMaj) + Rank2,
family = binomial,
data = dat))
inseq <- seq(-50,
max(dat$ConMaj, na.rm = TRUE),
length.out = 100)
outy <- predict(mod6,
newdata = data.frame(ConMaj = inseq, Rank2 = 0),
se.fit = TRUE)
plot.df <- data.frame(ConMaj = inseq,
pr = plogis(outy$fit),
lo = plogis(outy$fit - 1.64 * outy$se.fit),
hi = plogis(outy$fit + 1.64 * outy$se.fit),
llo = plogis(outy$fit - 1.96 * outy$se.fit),
hhi = plogis(outy$fit + 1.96 * outy$se.fit))
p3 <- ggplot(plot.df, aes(x = ConMaj,
y = pr,
ymin = lo,
ymax = hi)) +
geom_ribbon(alpha = 0.25) +
geom_ribbon(aes(ymin = llo, ymax = hhi), alpha = 0.5) +
geom_line() +
scale_x_continuous("Conservative majority") +
scale_y_continuous("Probability of receiving funding")+
labs(title = "Probability of winning funding for towns on the cut-off") +
theme_ipsum_rc()
ggsave(p3, file = "../figure/p3.png")
plot(inseq, plogis(outy))
### Examine all possible linear combinations
eg <- expand.grid(lapply(xr, function(x)c(FALSE, TRUE)))
### Exclude the combination where there's no other variable
eg <- eg[rowSums(eg) > 0,]
fs <- apply(eg, 1, function(l) paste0("funded.num ~ ",
paste0(xr[l], collapse = " + "),
" + ConWinner1"))
mods <- lapply(fs, function(f) glm(f, data = dat, family = binomial))
tmods <- lapply(mods, tidy) %>%
bind_rows()
tmods <- tmods %>%
filter(term == "ConWinner1TRUE")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment