Created
September 24, 2020 19:44
-
-
Save chrishanretty/22cc18d066e4b7b6d4c81806e6bd1b45 to your computer and use it in GitHub Desktop.
Towns fund analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Start by plotting stuff | |
library(tidyverse) | |
library(hrbrthemes) | |
## Download data from https://docs.google.com/spreadsheets/d/1inaI17l9Q-XADRNE_toXLp65sKu4_x4bXUFjZ3qjB2o/edit?usp=sharing | |
dat <- read.csv("wherever_i_saved_the_google_docs.csv") | |
### Estimate a simple model which we'll use when graphing | |
mod <- glm(I(Funded == "Yes") ~ Rank2 + ConWinner1, | |
family = binomial, | |
data = dat) | |
### | |
ann.df <- expand.grid(Rank2 = unique(dat$Rank2), | |
ConWinner1 = c(FALSE, TRUE)) | |
ann.df$Funded <- plogis(predict(mod, newdata = ann.df)) | |
dat$Funded <- as.numeric(dat$Funded == "Yes") | |
p1 <- ggplot(dat, | |
aes(x = Rank2, | |
y = Funded, | |
colour = ConWinner1)) + | |
geom_point(position = position_jitter(width = 0, height = 0.1), | |
alpha = 4/5, | |
size = 1.5) + | |
scale_x_continuous("Ranking within region\n(Positive values indicate ranks above cut-off)") + | |
scale_y_continuous("Funded", breaks = c(0, 1), labels = c("No", "Yes")) + | |
scale_colour_discrete("Conservative-held area") + | |
geom_line(data = ann.df, linetype = 2, size = 1.5) + | |
labs(title = "What the data in fact looks like", | |
subtitle = "53 of 61 areas are wholly or partly in Conservative seats", | |
caption = "Data: NAO. Analysis: @chrishanretty") + | |
theme_ipsum_rc() + | |
theme(legend.position = "bottom") | |
### Counterfactual plot | |
dat$counterfactual <- as.numeric(dat$Rank2 >= 0) | |
p0 <- ggplot(dat, | |
aes(x = Rank2, | |
y = counterfactual, | |
colour = ConWinner1)) + | |
geom_point(position = position_jitter(width = 0, height = 0.1), | |
alpha = 4/5, | |
size = 2) + | |
scale_x_continuous("Ranking within region\n(Positive values indicate ranks above cut-off)") + | |
scale_y_continuous("Funded", breaks = c(0, 1), labels = c("No", "Yes")) + | |
scale_colour_discrete("Conservative-held area") + | |
labs(title = "What the data would have looked like\nhad ministers followed the civil service", | |
subtitle = "38 out 60 funded areas are wholly or partly in Conservative seats", | |
caption = "Data: NAO. Analysis: @chrishanretty") + | |
geom_vline(xintercept = 0, linetype = 2, size = 1.5) + | |
theme_ipsum_rc() + | |
theme(legend.position = "bottom") | |
ggsave(p0, file = "../figure/p0.png") | |
ggsave(p1, file = "../figure/p1.png") | |
### Now model stuff | |
library(tidyverse) | |
library(hrbrthemes) | |
library(margins) | |
library(mgcv) | |
library(gt) | |
library(modelsummary) | |
library(broom) | |
### Estimate a simple model which we'll use when graphing | |
mod1 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2, | |
family = binomial, | |
data = dat) | |
summary(margins(mod1)) | |
mod2 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 + log(Pop), | |
family = binomial, | |
data = dat) | |
mod3 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop), | |
family = binomial, | |
data = dat) | |
mod4 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop) + PriorityGrouping, | |
family = binomial, | |
data = dat) | |
mod5 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop) + PriorityGrouping + Region, | |
family = binomial, | |
data = dat) | |
msummary(list(mod1, mod2, mod3, mod4, mod5), | |
coef_map = c("ConWinner1TRUE" = "Town in Conservative-held seat", | |
"Rank2" = "Places ahead of region cut-off", | |
"log(Pop)" = "log(Population in thousands)", | |
"PriorityGroupingMedium" = "Medium (v. low) priority group", | |
"RegionEast of England" = "East of England", | |
"RegionNorth East" = "North East", | |
"RegionNorth West" = "North West", | |
"RegionSouth East" = "South East", | |
"RegionSouth West" = "South West", | |
"RegionWest Midlands" = "W Midlands", | |
"RegionYorkshire and the Humber" = "Yorks and Humber", | |
"Rank2:log(Pop)" = "Places ahead of cut-off times log population"), | |
output = "gt") %>% | |
gtsave(filename = "tmp.html") | |
### What does our spline model look like? | |
summary(mod6 <- gam(I(Funded == "Yes") ~ s(ConMaj) + Rank2, | |
family = binomial, | |
data = dat)) | |
inseq <- seq(-50, | |
max(dat$ConMaj, na.rm = TRUE), | |
length.out = 100) | |
outy <- predict(mod6, | |
newdata = data.frame(ConMaj = inseq, Rank2 = 0), | |
se.fit = TRUE) | |
plot.df <- data.frame(ConMaj = inseq, | |
pr = plogis(outy$fit), | |
lo = plogis(outy$fit - 1.64 * outy$se.fit), | |
hi = plogis(outy$fit + 1.64 * outy$se.fit), | |
llo = plogis(outy$fit - 1.96 * outy$se.fit), | |
hhi = plogis(outy$fit + 1.96 * outy$se.fit)) | |
p3 <- ggplot(plot.df, aes(x = ConMaj, | |
y = pr, | |
ymin = lo, | |
ymax = hi)) + | |
geom_ribbon(alpha = 0.25) + | |
geom_ribbon(aes(ymin = llo, ymax = hhi), alpha = 0.5) + | |
geom_line() + | |
scale_x_continuous("Conservative majority") + | |
scale_y_continuous("Probability of receiving funding")+ | |
labs(title = "Probability of winning funding for towns on the cut-off") + | |
theme_ipsum_rc() | |
ggsave(p3, file = "../figure/p3.png") | |
plot(inseq, plogis(outy)) | |
### Examine all possible linear combinations | |
eg <- expand.grid(lapply(xr, function(x)c(FALSE, TRUE))) | |
### Exclude the combination where there's no other variable | |
eg <- eg[rowSums(eg) > 0,] | |
fs <- apply(eg, 1, function(l) paste0("funded.num ~ ", | |
paste0(xr[l], collapse = " + "), | |
" + ConWinner1")) | |
mods <- lapply(fs, function(f) glm(f, data = dat, family = binomial)) | |
tmods <- lapply(mods, tidy) %>% | |
bind_rows() | |
tmods <- tmods %>% | |
filter(term == "ConWinner1TRUE") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment