chrishanretty/towns_fund.R

## towns_fund.R
### Start by plotting stuff
library(tidyverse)
library(hrbrthemes)

## Download data from https://docs.google.com/spreadsheets/d/1inaI17l9Q-XADRNE_toXLp65sKu4_x4bXUFjZ3qjB2o/edit?usp=sharing
dat <- read.csv("wherever_i_saved_the_google_docs.csv")

### Estimate a simple model which we'll use when graphing
mod <- glm(I(Funded == "Yes") ~ Rank2 + ConWinner1,
           family = binomial,
           data = dat)

###
ann.df <- expand.grid(Rank2 = unique(dat$Rank2),
                      ConWinner1 = c(FALSE, TRUE))

ann.df$Funded <- plogis(predict(mod, newdata = ann.df))

dat$Funded <- as.numeric(dat$Funded == "Yes")

p1 <- ggplot(dat,
             aes(x = Rank2,
                 y = Funded,
                 colour = ConWinner1)) +
    geom_point(position = position_jitter(width = 0, height = 0.1),
               alpha = 4/5,
               size = 1.5) +
    scale_x_continuous("Ranking within region\n(Positive values indicate ranks above cut-off)") +
    scale_y_continuous("Funded", breaks = c(0, 1), labels = c("No", "Yes")) +
    scale_colour_discrete("Conservative-held area") +
    geom_line(data = ann.df, linetype = 2, size = 1.5) +
    labs(title = "What the data in fact looks like",
         subtitle = "53 of 61 areas are wholly or partly in Conservative seats",
         caption = "Data: NAO. Analysis: @chrishanretty") +
    theme_ipsum_rc() +
    theme(legend.position = "bottom")


### Counterfactual plot
dat$counterfactual <- as.numeric(dat$Rank2 >= 0)

p0 <- ggplot(dat,
             aes(x = Rank2,
                 y = counterfactual,
                 colour = ConWinner1)) +
    geom_point(position = position_jitter(width = 0, height = 0.1),
               alpha = 4/5,
               size = 2) +
    scale_x_continuous("Ranking within region\n(Positive values indicate ranks above cut-off)") +
    scale_y_continuous("Funded", breaks = c(0, 1), labels = c("No", "Yes")) +
    scale_colour_discrete("Conservative-held area") +
    labs(title = "What the data would have looked like\nhad ministers followed the civil service",
         subtitle = "38 out 60 funded areas are wholly or partly in Conservative seats",
         caption = "Data: NAO. Analysis: @chrishanretty") +
    geom_vline(xintercept = 0, linetype = 2, size = 1.5) +
    theme_ipsum_rc() +
    theme(legend.position = "bottom")

ggsave(p0, file = "../figure/p0.png")
ggsave(p1, file = "../figure/p1.png")

### Now model stuff
library(tidyverse)
library(hrbrthemes)
library(margins)
library(mgcv)
library(gt)
library(modelsummary)
library(broom)

### Estimate a simple model which we'll use when graphing
mod1 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2,
           family = binomial,
           data = dat)

summary(margins(mod1))

mod2 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 + log(Pop),
           family = binomial,
           data = dat)

mod3 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop),
           family = binomial,
           data = dat)

mod4 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop) + PriorityGrouping,
           family = binomial,
           data = dat)

mod5 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop) + PriorityGrouping + Region,
           family = binomial,
           data = dat)

msummary(list(mod1, mod2, mod3, mod4, mod5),
         coef_map = c("ConWinner1TRUE" = "Town in Conservative-held seat",
                      "Rank2" = "Places ahead of region cut-off",
                      "log(Pop)" = "log(Population in thousands)",
                      "PriorityGroupingMedium" = "Medium (v. low) priority group",
                      "RegionEast of England" = "East of England",
                      "RegionNorth East" = "North East",
                      "RegionNorth West" = "North West",
                      "RegionSouth East" = "South East",
                      "RegionSouth West" = "South West",
                      "RegionWest Midlands" = "W Midlands",
                      "RegionYorkshire and the Humber" = "Yorks and Humber",
                      "Rank2:log(Pop)" = "Places ahead of cut-off times log population"),
         output = "gt") %>%
    gtsave(filename = "tmp.html")

### What does our spline model look like?
summary(mod6 <- gam(I(Funded == "Yes") ~ s(ConMaj) + Rank2,
                    family = binomial,
            data = dat))

inseq <- seq(-50,
             max(dat$ConMaj, na.rm = TRUE),
             length.out = 100)

outy <- predict(mod6,
                newdata = data.frame(ConMaj = inseq, Rank2 = 0),
                se.fit = TRUE)

plot.df <- data.frame(ConMaj = inseq,
                      pr = plogis(outy$fit),
                      lo = plogis(outy$fit - 1.64 * outy$se.fit),
                      hi = plogis(outy$fit + 1.64 * outy$se.fit),
                      llo = plogis(outy$fit - 1.96 * outy$se.fit),
                      hhi = plogis(outy$fit + 1.96 * outy$se.fit))

p3 <- ggplot(plot.df, aes(x = ConMaj,
                    y = pr,
                    ymin = lo,
                    ymax = hi)) +
    geom_ribbon(alpha = 0.25) +
    geom_ribbon(aes(ymin = llo, ymax = hhi), alpha = 0.5) +
    geom_line() +
    scale_x_continuous("Conservative majority") +
    scale_y_continuous("Probability of receiving funding")+
    labs(title = "Probability of winning funding for towns on the cut-off") +
    theme_ipsum_rc()

ggsave(p3, file = "../figure/p3.png")

plot(inseq, plogis(outy))

### Examine all possible linear combinations
eg <- expand.grid(lapply(xr, function(x)c(FALSE, TRUE)))
### Exclude the combination where there's no other variable
eg <- eg[rowSums(eg) > 0,]
fs <- apply(eg, 1, function(l) paste0("funded.num ~ ",
                             paste0(xr[l], collapse = " + "),
                             " + ConWinner1"))

mods <- lapply(fs, function(f) glm(f, data = dat, family = binomial))
tmods <- lapply(mods, tidy) %>%
    bind_rows()
tmods <- tmods %>%
    filter(term == "ConWinner1TRUE")
	### Start by plotting stuff
	library(tidyverse)
	library(hrbrthemes)

	## Download data from https://docs.google.com/spreadsheets/d/1inaI17l9Q-XADRNE_toXLp65sKu4_x4bXUFjZ3qjB2o/edit?usp=sharing
	dat <- read.csv("wherever_i_saved_the_google_docs.csv")

	### Estimate a simple model which we'll use when graphing
	mod <- glm(I(Funded == "Yes") ~ Rank2 + ConWinner1,
	family = binomial,
	data = dat)

	###
	ann.df <- expand.grid(Rank2 = unique(dat$Rank2),
	ConWinner1 = c(FALSE, TRUE))

	ann.df$Funded <- plogis(predict(mod, newdata = ann.df))

	dat$Funded <- as.numeric(dat$Funded == "Yes")

	p1 <- ggplot(dat,
	aes(x = Rank2,
	y = Funded,
	colour = ConWinner1)) +
	geom_point(position = position_jitter(width = 0, height = 0.1),
	alpha = 4/5,
	size = 1.5) +
	scale_x_continuous("Ranking within region\n(Positive values indicate ranks above cut-off)") +
	scale_y_continuous("Funded", breaks = c(0, 1), labels = c("No", "Yes")) +
	scale_colour_discrete("Conservative-held area") +
	geom_line(data = ann.df, linetype = 2, size = 1.5) +
	labs(title = "What the data in fact looks like",
	subtitle = "53 of 61 areas are wholly or partly in Conservative seats",
	caption = "Data: NAO. Analysis: @chrishanretty") +
	theme_ipsum_rc() +
	theme(legend.position = "bottom")


	### Counterfactual plot
	dat$counterfactual <- as.numeric(dat$Rank2 >= 0)

	p0 <- ggplot(dat,
	aes(x = Rank2,
	y = counterfactual,
	colour = ConWinner1)) +
	geom_point(position = position_jitter(width = 0, height = 0.1),
	alpha = 4/5,
	size = 2) +
	scale_x_continuous("Ranking within region\n(Positive values indicate ranks above cut-off)") +
	scale_y_continuous("Funded", breaks = c(0, 1), labels = c("No", "Yes")) +
	scale_colour_discrete("Conservative-held area") +
	labs(title = "What the data would have looked like\nhad ministers followed the civil service",
	subtitle = "38 out 60 funded areas are wholly or partly in Conservative seats",
	caption = "Data: NAO. Analysis: @chrishanretty") +
	geom_vline(xintercept = 0, linetype = 2, size = 1.5) +
	theme_ipsum_rc() +
	theme(legend.position = "bottom")

	ggsave(p0, file = "../figure/p0.png")
	ggsave(p1, file = "../figure/p1.png")

	### Now model stuff
	library(tidyverse)
	library(hrbrthemes)
	library(margins)
	library(mgcv)
	library(gt)
	library(modelsummary)
	library(broom)

	### Estimate a simple model which we'll use when graphing
	mod1 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2,
	family = binomial,
	data = dat)

	summary(margins(mod1))

	mod2 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 + log(Pop),
	family = binomial,
	data = dat)

	mod3 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop),
	family = binomial,
	data = dat)

	mod4 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop) + PriorityGrouping,
	family = binomial,
	data = dat)

	mod5 <- glm(I(Funded == "Yes") ~ ConWinner1 + Rank2 * log(Pop) + PriorityGrouping + Region,
	family = binomial,
	data = dat)

	msummary(list(mod1, mod2, mod3, mod4, mod5),
	coef_map = c("ConWinner1TRUE" = "Town in Conservative-held seat",
	"Rank2" = "Places ahead of region cut-off",
	"log(Pop)" = "log(Population in thousands)",
	"PriorityGroupingMedium" = "Medium (v. low) priority group",
	"RegionEast of England" = "East of England",
	"RegionNorth East" = "North East",
	"RegionNorth West" = "North West",
	"RegionSouth East" = "South East",
	"RegionSouth West" = "South West",
	"RegionWest Midlands" = "W Midlands",
	"RegionYorkshire and the Humber" = "Yorks and Humber",
	"Rank2:log(Pop)" = "Places ahead of cut-off times log population"),
	output = "gt") %>%
	gtsave(filename = "tmp.html")

	### What does our spline model look like?
	summary(mod6 <- gam(I(Funded == "Yes") ~ s(ConMaj) + Rank2,
	family = binomial,
	data = dat))

	inseq <- seq(-50,
	max(dat$ConMaj, na.rm = TRUE),
	length.out = 100)

	outy <- predict(mod6,
	newdata = data.frame(ConMaj = inseq, Rank2 = 0),
	se.fit = TRUE)

	plot.df <- data.frame(ConMaj = inseq,
	pr = plogis(outy$fit),
	lo = plogis(outy$fit - 1.64 * outy$se.fit),
	hi = plogis(outy$fit + 1.64 * outy$se.fit),
	llo = plogis(outy$fit - 1.96 * outy$se.fit),
	hhi = plogis(outy$fit + 1.96 * outy$se.fit))

	p3 <- ggplot(plot.df, aes(x = ConMaj,
	y = pr,
	ymin = lo,
	ymax = hi)) +
	geom_ribbon(alpha = 0.25) +
	geom_ribbon(aes(ymin = llo, ymax = hhi), alpha = 0.5) +
	geom_line() +
	scale_x_continuous("Conservative majority") +
	scale_y_continuous("Probability of receiving funding")+
	labs(title = "Probability of winning funding for towns on the cut-off") +
	theme_ipsum_rc()

	ggsave(p3, file = "../figure/p3.png")

	plot(inseq, plogis(outy))

	### Examine all possible linear combinations
	eg <- expand.grid(lapply(xr, function(x)c(FALSE, TRUE)))
	### Exclude the combination where there's no other variable
	eg <- eg[rowSums(eg) > 0,]
	fs <- apply(eg, 1, function(l) paste0("funded.num ~ ",
	paste0(xr[l], collapse = " + "),
	" + ConWinner1"))

	mods <- lapply(fs, function(f) glm(f, data = dat, family = binomial))
	tmods <- lapply(mods, tidy) %>%
	bind_rows()
	tmods <- tmods %>%
	filter(term == "ConWinner1TRUE")