Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Created April 13, 2024 13:13
Show Gist options
  • Save chrishanretty/9d6599a819a892103683104b118cf987 to your computer and use it in GitHub Desktop.
Save chrishanretty/9d6599a819a892103683104b118cf987 to your computer and use it in GitHub Desktop.
Separate multinomial regression of vote choice on lagged constituency vote shares for successive waves of the BES
### Load libraries
library(tidyverse)
library(haven) ## for Stata import
library(readxl)
library(marginaleffects) ## for easy predictions and contrasts
library(nnet) ## for nnet::multinom
library(furrr) ## to perform computations in parallel
### Set fewer workers if you have fewer cores
plan(multisession, workers = 12)
### Project assumes you have the following two files in your working directory
###
### - BES2019_W25_Panel_v25.1.dta
### - BES-2019-General-Election-results-file-v1.1.xlsx"
dat <- read_dta("BES2019_W25_Panel_v25.1.dta",
col_select = c(id,
starts_with("pconW"),
starts_with("generalElectionVote")))
## Bit of playing around to get labels working properly
dat <- dat |>
filter(!is.na(id)) |>
mutate_all(as_factor) |>
mutate_all(as.character)
### We need to reshape the data to move from wide to long format
### First longer so that each row is id/variable/{value+wave}
datl <- dat |>
pivot_longer(cols = c(starts_with("pcon"),
starts_with("generalElectionVote")),
names_to = c("variable", "wave"),
names_sep = "W") |>
mutate(wave = as.numeric(wave))
### then wider so that each row is id/wave
datw <- datl |>
pivot_wider(id_cols = c(id, wave),
names_from = variable,
values_from = value) |>
dplyr::select(id, wave, pcon, generalElectionVote)
### tidy up some of the vote stuff, collapsing minor parties
datw <- datw |>
mutate(vi = dplyr::recode(generalElectionVote,
"An independent candidate" = "Other",
"Brexit Party/Reform UK" = "BP/Reform/UKIP",
"British National Party (BNP)" = "Other",
"Change UK- The Independent Group" = "Other",
"Conservative" = "Con",
"Don't know" = "DK",
"Green Party" = "Green",
"I would/did not vote" = "DNV",
"Labour"= "Lab",
"Liberal Democrat" = "LDem",
"Other" = "Other",
"Plaid Cymru" = "PC",
"Scottish National Party (SNP)" = "SNP",
"United Kingdom Independence Party (UKIP)" = "BP/Reform/UKIP")) |>
dplyr::select(-generalElectionVote)
### We now want to collapse this to one row per constituency/wave combination
### with counts of the various dependent variables
datw <- datw |>
filter(!is.na(vi)) |>
filter(vi != "SNP") |>
filter(vi != "PC") |>
group_by(wave, pcon, vi) |>
summarize(count = n(),
.groups = "drop") |>
pivot_wider(names_from = vi,
values_from = count,
values_fill = 0L)
###
### Link with constituency information
aux <- read_xlsx("BES-2019-General-Election-results-file-v1.1.xlsx") |>
dplyr::select(ConstituencyName, ONSConstID, Country,
Con19, Lab19, LD19, SNP19, PC19, UKIP19, Green19, Brexit19, Other19,
Con17, Lab17, LD17, SNP17, PC17, UKIP17, Green17, Other17,
Con15, Lab15, LD15, SNP15, PC15, UKIP15, Green15, Other15,
Con10, Lab10, LD10, SNP10, PC10, UKIP10, Green10) |>
mutate(across(Con19:Green10, \(x) coalesce(x, 0.0)))
aux <- aux |>
pivot_longer(cols = Con19:Green10) |>
mutate(yr = gsub("[^0-9]", "", name),
party = gsub("[0-9]", "", name),
value = value / 100) |>
pivot_wider(names_from = party,
values_from = value,
id_cols =c(ConstituencyName, ONSConstID, Country, yr)) |>
mutate(earliest_wave = case_when(yr == "19" ~ 19,
yr == "17" ~ 13,
yr == "15" ~ 6,
yr == "10" ~ 1),
latest_wave = case_when(yr == "19" ~ 99,
yr == "17" ~ 19,
yr == "15" ~ 13,
yr == "10" ~ 6))
aux <- aux |>
dplyr::select(ConstituencyName,
ONSConstID,
Country,
Con_sh = Con,
Lab_sh = Lab,
LD_sh = LD,
UKIP_sh = UKIP,
Green_sh = Green,
Other_sh = Other,
earliest_wave,
latest_wave)
datw <- left_join(datw,
aux,
by = join_by(pcon == ConstituencyName,
wave >= earliest_wave,
wave < latest_wave))
### Now we're going to estimate a multinomial model where the vote
### shares are interacted with wave
### We're going to keep it simple, and restrict it to England
datw <- datw |>
filter(Country == "England")
model_func <- function(df) {
m <- multinom(cbind(DNV, Con, Lab, LDem, `BP/Reform/UKIP`,
Green, DK, Other) ~ Con_sh + Lab_sh + LD_sh + Green_sh,
data = df,
trace = FALSE)
}
datn <- datw |>
group_by(wave) |>
nest() |>
mutate(model = future_map(data, model_func))
comparison_func <- function(model) {
### For each model, work out the comparison setting LD_sh
avg_comparisons(model,
variables = list(LD_sh = \(x) data.frame(low = x, high = x + 1/100))) |>
as.data.frame() |>
filter(group == "LDem")
}
datn <- datn |>
mutate(obj = future_map(model, comparison_func))
plot_df <- datn |>
dplyr::select(wave, obj) |>
unnest(cols = obj)
### Convert waves into dates
plot_df <- plot_df |>
mutate(wave_date = dplyr::recode(wave,
`25` = "2023-05-01",
`24` = "2022-12-01",
`23` = "2022-05-01",
`22` = "2021-11-01",
`21` = "2021-05-01",
`20` = "2020-06-01",
`19` = "2019-12-01",
`18` = "2019-11-15",
`17` = "2019-11-01",
`16` = "2019-05-01",
`15` = "2019-03-01",
`14` = "2018-05-01",
`13` = "2017-06-01",
`12` = "2017-05-01",
`11` = "2017-04-01",
`10` = "2016-11-01",
`9` = "2016-06-01",
`8` = "2016-05-01",
`7` = "2016-04-01",
`6` = "2015-05-01",
`5` = "2015-04-01",
`4` = "2015-03-01",
`3` = "2014-09-01",
`2` = "2014-05-01",
`1` = "2014-02-01"),
wave_date = as.Date(wave_date))
p <- ggplot(plot_df, aes(x = wave_date,
y = estimate,
ymin = conf.low,
ymax = conf.high)) +
geom_vline(xintercept = c(as.Date("2015-05-07"),
as.Date("2017-06-08"),
as.Date("2019-12-12")),
linetype = 2,
colour = "darkgrey") +
geom_hline(yintercept = .01,
linetype = 2,
colour = "darkgrey") +
annotate(geom = "text",
x = as.Date("2014-01-01"),
y = 0.01,
hjust = 0,
vjust = 0,
label = "Model reproduces uniform national swing",
colour = "darkgrey") +
scale_x_date("Approximate date of fieldwork for each wave") +
scale_y_continuous("Effect of a percentage point change\nin lagged Lib Dem vote share",
labels = scales::percent,
breaks = c(0, .0025, 0.005, 0.0075, 0.01)) +
geom_pointrange(fill = "goldenrod",
colour = colorspace::darken("goldenrod"), shape = 24) +
theme_bw()
ggsave(p,
file = "consty_fx.png",
width = 9, height = 6)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment