Created
April 13, 2024 13:13
-
-
Save chrishanretty/9d6599a819a892103683104b118cf987 to your computer and use it in GitHub Desktop.
Separate multinomial regression of vote choice on lagged constituency vote shares for successive waves of the BES
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Load libraries | |
library(tidyverse) | |
library(haven) ## for Stata import | |
library(readxl) | |
library(marginaleffects) ## for easy predictions and contrasts | |
library(nnet) ## for nnet::multinom | |
library(furrr) ## to perform computations in parallel | |
### Set fewer workers if you have fewer cores | |
plan(multisession, workers = 12) | |
### Project assumes you have the following two files in your working directory | |
### | |
### - BES2019_W25_Panel_v25.1.dta | |
### - BES-2019-General-Election-results-file-v1.1.xlsx" | |
dat <- read_dta("BES2019_W25_Panel_v25.1.dta", | |
col_select = c(id, | |
starts_with("pconW"), | |
starts_with("generalElectionVote"))) | |
## Bit of playing around to get labels working properly | |
dat <- dat |> | |
filter(!is.na(id)) |> | |
mutate_all(as_factor) |> | |
mutate_all(as.character) | |
### We need to reshape the data to move from wide to long format | |
### First longer so that each row is id/variable/{value+wave} | |
datl <- dat |> | |
pivot_longer(cols = c(starts_with("pcon"), | |
starts_with("generalElectionVote")), | |
names_to = c("variable", "wave"), | |
names_sep = "W") |> | |
mutate(wave = as.numeric(wave)) | |
### then wider so that each row is id/wave | |
datw <- datl |> | |
pivot_wider(id_cols = c(id, wave), | |
names_from = variable, | |
values_from = value) |> | |
dplyr::select(id, wave, pcon, generalElectionVote) | |
### tidy up some of the vote stuff, collapsing minor parties | |
datw <- datw |> | |
mutate(vi = dplyr::recode(generalElectionVote, | |
"An independent candidate" = "Other", | |
"Brexit Party/Reform UK" = "BP/Reform/UKIP", | |
"British National Party (BNP)" = "Other", | |
"Change UK- The Independent Group" = "Other", | |
"Conservative" = "Con", | |
"Don't know" = "DK", | |
"Green Party" = "Green", | |
"I would/did not vote" = "DNV", | |
"Labour"= "Lab", | |
"Liberal Democrat" = "LDem", | |
"Other" = "Other", | |
"Plaid Cymru" = "PC", | |
"Scottish National Party (SNP)" = "SNP", | |
"United Kingdom Independence Party (UKIP)" = "BP/Reform/UKIP")) |> | |
dplyr::select(-generalElectionVote) | |
### We now want to collapse this to one row per constituency/wave combination | |
### with counts of the various dependent variables | |
datw <- datw |> | |
filter(!is.na(vi)) |> | |
filter(vi != "SNP") |> | |
filter(vi != "PC") |> | |
group_by(wave, pcon, vi) |> | |
summarize(count = n(), | |
.groups = "drop") |> | |
pivot_wider(names_from = vi, | |
values_from = count, | |
values_fill = 0L) | |
### | |
### Link with constituency information | |
aux <- read_xlsx("BES-2019-General-Election-results-file-v1.1.xlsx") |> | |
dplyr::select(ConstituencyName, ONSConstID, Country, | |
Con19, Lab19, LD19, SNP19, PC19, UKIP19, Green19, Brexit19, Other19, | |
Con17, Lab17, LD17, SNP17, PC17, UKIP17, Green17, Other17, | |
Con15, Lab15, LD15, SNP15, PC15, UKIP15, Green15, Other15, | |
Con10, Lab10, LD10, SNP10, PC10, UKIP10, Green10) |> | |
mutate(across(Con19:Green10, \(x) coalesce(x, 0.0))) | |
aux <- aux |> | |
pivot_longer(cols = Con19:Green10) |> | |
mutate(yr = gsub("[^0-9]", "", name), | |
party = gsub("[0-9]", "", name), | |
value = value / 100) |> | |
pivot_wider(names_from = party, | |
values_from = value, | |
id_cols =c(ConstituencyName, ONSConstID, Country, yr)) |> | |
mutate(earliest_wave = case_when(yr == "19" ~ 19, | |
yr == "17" ~ 13, | |
yr == "15" ~ 6, | |
yr == "10" ~ 1), | |
latest_wave = case_when(yr == "19" ~ 99, | |
yr == "17" ~ 19, | |
yr == "15" ~ 13, | |
yr == "10" ~ 6)) | |
aux <- aux |> | |
dplyr::select(ConstituencyName, | |
ONSConstID, | |
Country, | |
Con_sh = Con, | |
Lab_sh = Lab, | |
LD_sh = LD, | |
UKIP_sh = UKIP, | |
Green_sh = Green, | |
Other_sh = Other, | |
earliest_wave, | |
latest_wave) | |
datw <- left_join(datw, | |
aux, | |
by = join_by(pcon == ConstituencyName, | |
wave >= earliest_wave, | |
wave < latest_wave)) | |
### Now we're going to estimate a multinomial model where the vote | |
### shares are interacted with wave | |
### We're going to keep it simple, and restrict it to England | |
datw <- datw |> | |
filter(Country == "England") | |
model_func <- function(df) { | |
m <- multinom(cbind(DNV, Con, Lab, LDem, `BP/Reform/UKIP`, | |
Green, DK, Other) ~ Con_sh + Lab_sh + LD_sh + Green_sh, | |
data = df, | |
trace = FALSE) | |
} | |
datn <- datw |> | |
group_by(wave) |> | |
nest() |> | |
mutate(model = future_map(data, model_func)) | |
comparison_func <- function(model) { | |
### For each model, work out the comparison setting LD_sh | |
avg_comparisons(model, | |
variables = list(LD_sh = \(x) data.frame(low = x, high = x + 1/100))) |> | |
as.data.frame() |> | |
filter(group == "LDem") | |
} | |
datn <- datn |> | |
mutate(obj = future_map(model, comparison_func)) | |
plot_df <- datn |> | |
dplyr::select(wave, obj) |> | |
unnest(cols = obj) | |
### Convert waves into dates | |
plot_df <- plot_df |> | |
mutate(wave_date = dplyr::recode(wave, | |
`25` = "2023-05-01", | |
`24` = "2022-12-01", | |
`23` = "2022-05-01", | |
`22` = "2021-11-01", | |
`21` = "2021-05-01", | |
`20` = "2020-06-01", | |
`19` = "2019-12-01", | |
`18` = "2019-11-15", | |
`17` = "2019-11-01", | |
`16` = "2019-05-01", | |
`15` = "2019-03-01", | |
`14` = "2018-05-01", | |
`13` = "2017-06-01", | |
`12` = "2017-05-01", | |
`11` = "2017-04-01", | |
`10` = "2016-11-01", | |
`9` = "2016-06-01", | |
`8` = "2016-05-01", | |
`7` = "2016-04-01", | |
`6` = "2015-05-01", | |
`5` = "2015-04-01", | |
`4` = "2015-03-01", | |
`3` = "2014-09-01", | |
`2` = "2014-05-01", | |
`1` = "2014-02-01"), | |
wave_date = as.Date(wave_date)) | |
p <- ggplot(plot_df, aes(x = wave_date, | |
y = estimate, | |
ymin = conf.low, | |
ymax = conf.high)) + | |
geom_vline(xintercept = c(as.Date("2015-05-07"), | |
as.Date("2017-06-08"), | |
as.Date("2019-12-12")), | |
linetype = 2, | |
colour = "darkgrey") + | |
geom_hline(yintercept = .01, | |
linetype = 2, | |
colour = "darkgrey") + | |
annotate(geom = "text", | |
x = as.Date("2014-01-01"), | |
y = 0.01, | |
hjust = 0, | |
vjust = 0, | |
label = "Model reproduces uniform national swing", | |
colour = "darkgrey") + | |
scale_x_date("Approximate date of fieldwork for each wave") + | |
scale_y_continuous("Effect of a percentage point change\nin lagged Lib Dem vote share", | |
labels = scales::percent, | |
breaks = c(0, .0025, 0.005, 0.0075, 0.01)) + | |
geom_pointrange(fill = "goldenrod", | |
colour = colorspace::darken("goldenrod"), shape = 24) + | |
theme_bw() | |
ggsave(p, | |
file = "consty_fx.png", | |
width = 9, height = 6) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment