Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Last active July 18, 2020 15:24
Show Gist options
  • Save chrishanretty/4a20ade91726e706edfce06ea127e8ef to your computer and use it in GitHub Desktop.
Save chrishanretty/4a20ade91726e706edfce06ea127e8ef to your computer and use it in GitHub Desktop.
library(tidyverse)
library(rdrobust)
library(hrbrthemes)
library(cowplot)
library(Cairo)
dat <- readRDS("tidy_data.rds")
dat <- dat %>%
mutate(count_satisfied.20 = round(pct_satisfied.20 * response.20),
change_1920 = pct_satisfied.20 - pct_satisfied.19,
Institution = sub("University of London, ", "", Institution)) %>%
mutate(turn.20 = response.20 / sample_size.20,
turn.19 = response.19 / sample_size.19)
successfully_reballotted <- c("Imperial College London",
"SOAS",
"Birkbeck College",
"Keele University",
"King's College London")
dat$Strike <- dat$Turnout_Strike >= 50
dat$reballotted <- dat$Institution %in% successfully_reballotted
### ##############################################################
### Lots of pretty pictures
### ##############################################################
scatter1 <- ggplot(dat, aes(x = pct_satisfied.19,
y = pct_satisfied.20,
colour = Strike,
shape = Strike)) +
geom_text(data = subset(dat, pct_satisfied.19 < .78 |
pct_satisfied.20 < .70 |
pct_satisfied.19 > .95),
aes(label = Institution),
nudge_y = 1/100) +
scale_x_continuous("Percent satisfied overall in 2019",
labels = scales::percent) +
scale_y_continuous("Percent satisfied overall in 2020",
labels = scales::percent) +
geom_point(size = 4) +
geom_smooth(method = "lm", se = FALSE) +
scale_colour_discrete(name = "Institution went on strike", position = "top") +
scale_shape_discrete(guide = FALSE) +
theme_ipsum() +
labs(title = "No clear negative effect of strike on 2020 NSS scores",
subtitle = "Striking institutions plotted in turquoise triangles")
CairoPNG(file = "scatter1.png",
width = 1400, height = 900)
scatter1
dev.off()
### (1) Plots for levels
base_plot <- ggplot(dat, aes(x = Turnout_Strike,
y = pct_satisfied.20,
shape = Strike,
colour = Strike)) +
geom_point() +
geom_vline(xintercept = 50) +
scale_colour_discrete(name = "Institution went on strike",
guide = "none") +
scale_shape_discrete(name = "Institution went on strike") +
geom_text(data = subset(dat, pct_satisfied.20 < .725),
aes(label = Institution),
nudge_y = 1/100) +
scale_x_continuous("Max. turnout in strike ballots") +
scale_y_continuous("Pct. satisfied in 2020 NSS",
labels = scales::percent) +
labs(title = "Effect of running variable (ballot turnout) on 2020 satisfaction") +
theme_ipsum_rc() +
theme(legend.position = "bottom")
levels_all_lin <- base_plot +
geom_smooth(method = "lm", formula = y ~ x, se = TRUE) +
labs(title = "Linear fit, all observations")
levels_all_poly2 <- base_plot +
geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = TRUE) +
labs(title = "Polynomial fit with degree 2, all observations")
levels_bw5_lin <- base_plot +
geom_smooth(data = subset(dat,
Turnout_Strike >= 45 &
Turnout_Strike <= 55),
method = "lm", formula = y ~ x, se = TRUE) +
labs(title = "Linear fit, observations within 5 points of threshold")
levels_bw5_poly2 <- base_plot +
geom_smooth(data = subset(dat,
Turnout_Strike >= 45 &
Turnout_Strike <= 55),
method = "lm", formula = y ~ poly(x, 2), se = TRUE) +
labs(title = "Polynomial fit with degree 2, observations within 5 points of threshold")
levels_bw2_lin <- base_plot +
geom_smooth(data = subset(dat,
Turnout_Strike >= 48 &
Turnout_Strike <= 52),
method = "lm", formula = y ~ x, se = TRUE) +
labs(title = "Linear fit, observations within 2 points of threshold")
levels_bw2_poly2 <- base_plot +
geom_smooth(data = subset(dat,
Turnout_Strike >= 48 &
Turnout_Strike <= 52),
method = "lm", formula = y ~ poly(x, 2), se = TRUE) +
labs(title = "Polynomial fit with degree 2, observations within 2 points of threshold")
levels_plots <- plot_grid(levels_all_lin, levels_all_poly2,
levels_bw5_lin, levels_bw5_poly2,
levels_bw2_lin, levels_bw2_poly2,
ncol = 2)
### (2) Plots for changes
base_plot <- ggplot(dat, aes(x = Turnout_Strike,
y = pct_satisfied.20 - pct_satisfied.19,
shape = Strike,
colour = Strike)) +
geom_point() +
geom_vline(xintercept = 50) +
geom_hline(yintercept = 0) +
scale_colour_discrete(name = "Institution went on strike",
guide = "none") +
scale_shape_discrete(name = "Institution went on strike") +
scale_x_continuous("Max. turnout in strike ballots") +
scale_y_continuous("Change in satisfaction",
labels = scales::percent) +
labs(title = "Effect of running variable (ballot turnout) on 2020 satisfaction") +
theme_ipsum_rc() +
theme(legend.position = "bottom")
changes_all_lin <- base_plot +
geom_smooth(method = "lm", formula = y ~ x, se = TRUE) +
labs(title = "Linear fit, all observations")
changes_all_poly2 <- base_plot +
geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = TRUE) +
labs(title = "Polynomial fit with degree 2, all observations")
changes_bw5_lin <- base_plot +
geom_smooth(data = subset(dat,
Turnout_Strike >= 45 &
Turnout_Strike <= 55),
method = "lm", formula = y ~ x, se = TRUE) +
labs(title = "Linear fit, observations within 5 points of threshold")
changes_bw5_poly2 <- base_plot +
geom_smooth(data = subset(dat,
Turnout_Strike >= 45 &
Turnout_Strike <= 55),
method = "lm", formula = y ~ poly(x, 2), se = TRUE) +
labs(title = "Polynomial fit with degree 2, observations within 5 points of threshold")
changes_bw2_lin <- base_plot +
geom_smooth(data = subset(dat,
Turnout_Strike >= 48 &
Turnout_Strike <= 52),
method = "lm", formula = y ~ x, se = TRUE) +
labs(title = "Linear fit, observations within 2 points of threshold")
changes_bw2_poly2 <- base_plot +
geom_smooth(data = subset(dat,
Turnout_Strike >= 48 &
Turnout_Strike <= 52),
method = "lm", formula = y ~ poly(x, 2), se = TRUE) +
labs(title = "Polynomial fit with degree 2, observations within 2 points of threshold")
changes_plots <- plot_grid(changes_all_lin, changes_all_poly2,
changes_bw5_lin, changes_bw5_poly2,
changes_bw2_lin, changes_bw2_poly2,
ncol = 2,
labels = "auto")
CairoPNG(file = "changes_plots.png",
width = 1400,
height = 900)
changes_plots
dev.off()
### RDDs
### Options include data used (all or strict)
### kernel choice (triangular, epi, uniform)
### and (for rdestimate only) bandwidth (2, 4, 5)
opts <- expand.grid(kernel = c("tri", "epa", "uni"),
data = c("all", "strict")) %>%
as_tibble()
opts$mods <- apply(opts, 1, function(x, data) {
kernopt <- x["kernel"]
datopt <- x["data"]
if (datopt == "all") {
df <- data
} else {
df <- subset(data, !reballotted)
}
mod <- rdrobust(y = df$pct_satisfied.20 - df$pct_satisfied.19,
x = df$Turnout_Strike,
c = 49.999,
kernel = kernopt)
return(mod)
}, data = dat)
### Get things out of this
opts <- opts %>%
mutate(bw = map(mods, function(x)x$bws[1,1]),
pe = map(mods, function(x)x$coef[1,1]),
obs_left = map(mods, function(x)x$N_h[1]),
obs_right = map(mods, function(x)x$N_h[2]),
lo = map(mods, function(x)x$ci[3,1]),
hi = map(mods, function(x)x$ci[3,2])) %>%
dplyr::select(bw, pe, lo, hi, kernel, data, obs_left, obs_right) %>%
unnest(cols = c(bw, pe, lo, hi, obs_left, obs_right)) %>%
mutate(data = dplyr::recode(data,
"all" = "All observations",
"strict" = "Excluding late strikers"),
Kernel = dplyr::recode(kernel,
"tri" = "Triangular",
"epa" = "Epanechnikov",
"uni" = "Uniform"),
obs = paste0("(", obs_left, "/", obs_right, ")"))
p1 <- ggplot(opts, aes(x = bw, y = pe,
shape = Kernel,
colour = Kernel,
ymin = lo, ymax = hi)) +
scale_x_continuous("Bandwidth") +
scale_y_continuous("Effect", labels = scales::percent) +
geom_pointrange(size = 1.75) +
geom_text(aes(label = obs), nudge_x = 1/10) +
geom_hline(yintercept = 0) +
facet_wrap(~data) +
theme_ipsum_rc() +
theme(legend.position = "bottom")
CairoPNG(file = "rdds.png", width = 1400, height = 900)
p1
dev.off()
### ##############################################################
### Diff-in-diff
### ##############################################################
### Need to reshape a little
dat_20 <- dat %>%
dplyr::select(UKPRN,
gpa = gpa.20,
pct_satisfied = pct_satisfied.20,
Turnout_Strike) %>%
mutate(Year = 2020)
dat_19 <- dat %>%
dplyr::select(UKPRN,
gpa = gpa.19,
pct_satisfied = pct_satisfied.19,
Turnout_Strike) %>%
mutate(Year = 2019)
long_df <- rbind(dat_19, dat_20)
summary(did_pct <- lm(pct_satisfied ~ I(Turnout_Strike > 50) * I(Year == 2020),
data = long_df))
### ############################################
### Auxiliary analyses on response rates
### ############################################
rd_turn <- rdrobust(y = dat$turn.20,
x = dat$Turnout_Strike,
c = 50,
covs = dat$turn.19)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment