chrishanretty/changing_fx_of_consty_context.R

## changing_fx_of_consty_context.R
### Load libraries
library(tidyverse)
library(haven) ## for Stata import
library(readxl)
library(marginaleffects) ## for easy predictions and contrasts
library(nnet) ## for nnet::multinom
library(furrr) ## to perform computations in parallel

### Set fewer workers if you have fewer cores
plan(multisession, workers = 12)

### Project assumes you have the following two files in your working directory
###
###  - BES2019_W25_Panel_v25.1.dta
###  - BES-2019-General-Election-results-file-v1.1.xlsx"
dat <- read_dta("BES2019_W25_Panel_v25.1.dta",
                col_select = c(id,
                               starts_with("pconW"),
                               starts_with("generalElectionVote")))

## Bit of playing around to get labels working properly
dat <- dat |>
    filter(!is.na(id)) |>
    mutate_all(as_factor) |>
    mutate_all(as.character)

### We need to reshape the data to move from wide to long format
### First longer so that each row is id/variable/{value+wave}
datl <- dat |>
    pivot_longer(cols = c(starts_with("pcon"),
                          starts_with("generalElectionVote")),
                 names_to = c("variable", "wave"),
                 names_sep = "W") |>
    mutate(wave = as.numeric(wave))

### then wider so that each row is id/wave
datw <- datl |>
    pivot_wider(id_cols = c(id, wave),
                names_from = variable,
                values_from = value) |>
    dplyr::select(id, wave, pcon, generalElectionVote)

### tidy up some of the vote stuff, collapsing minor parties
datw <- datw |>
    mutate(vi = dplyr::recode(generalElectionVote,
                              "An independent candidate" = "Other",
                              "Brexit Party/Reform UK" = "BP/Reform/UKIP",
                              "British National Party (BNP)" = "Other",
                              "Change UK- The Independent Group" = "Other",
                              "Conservative" = "Con",
                              "Don't know" = "DK",
                              "Green Party" = "Green",
                              "I would/did not vote" = "DNV",
                              "Labour"= "Lab",
                              "Liberal Democrat" = "LDem",
                              "Other" = "Other",
                              "Plaid Cymru" = "PC",
                              "Scottish National Party (SNP)" = "SNP",
                              "United Kingdom Independence Party (UKIP)" = "BP/Reform/UKIP")) |>
    dplyr::select(-generalElectionVote)

### We now want to collapse this to one row per constituency/wave combination
### with counts of the various dependent variables
datw <- datw |>
    filter(!is.na(vi)) |>
    filter(vi != "SNP") |>
    filter(vi != "PC") |>
    group_by(wave, pcon, vi) |>
    summarize(count = n(),
              .groups = "drop") |>
    pivot_wider(names_from = vi,
                values_from = count,
                values_fill = 0L)

###
### Link with constituency information
aux <- read_xlsx("BES-2019-General-Election-results-file-v1.1.xlsx") |>
    dplyr::select(ConstituencyName, ONSConstID, Country,
                  Con19, Lab19, LD19, SNP19, PC19, UKIP19, Green19, Brexit19, Other19,
                  Con17, Lab17, LD17, SNP17, PC17, UKIP17, Green17, Other17,
                  Con15, Lab15, LD15, SNP15, PC15, UKIP15, Green15, Other15,
                  Con10, Lab10, LD10, SNP10, PC10, UKIP10, Green10) |>
    mutate(across(Con19:Green10, \(x) coalesce(x, 0.0)))

aux <- aux |>
    pivot_longer(cols = Con19:Green10) |>
    mutate(yr = gsub("[^0-9]", "", name),
           party = gsub("[0-9]", "", name),
           value = value / 100) |>
    pivot_wider(names_from = party,
                values_from = value,
                id_cols =c(ConstituencyName, ONSConstID, Country, yr)) |>
    mutate(earliest_wave = case_when(yr == "19" ~ 19,
                                     yr == "17" ~ 13,
                                     yr == "15" ~ 6,
                                     yr == "10" ~ 1),
           latest_wave = case_when(yr == "19" ~ 99,
                                   yr == "17" ~ 19,
                                   yr == "15" ~ 13,
                                   yr == "10" ~ 6))

aux <- aux |>
    dplyr::select(ConstituencyName,
                  ONSConstID,
                  Country,
                  Con_sh = Con,
                  Lab_sh = Lab,
                  LD_sh = LD,
                  UKIP_sh = UKIP,
                  Green_sh = Green,
                  Other_sh = Other,
                  earliest_wave,
                  latest_wave)

datw <- left_join(datw,
                  aux,
                  by = join_by(pcon == ConstituencyName,
                               wave >= earliest_wave,
                               wave < latest_wave))


### Now we're going to estimate a multinomial model where the vote
### shares are interacted with wave
### We're going to keep it simple, and restrict it to England

datw <- datw |>
    filter(Country == "England")


model_func <- function(df) {
    m <- multinom(cbind(DNV, Con, Lab, LDem, `BP/Reform/UKIP`,
                        Green, DK, Other) ~ Con_sh + Lab_sh + LD_sh + Green_sh,
                  data = df,
                  trace = FALSE)
}

datn <- datw |>
    group_by(wave) |>
    nest() |>
    mutate(model = future_map(data, model_func))

comparison_func <- function(model) {
### For each model, work out the comparison setting LD_sh
    avg_comparisons(model,
                    variables = list(LD_sh = \(x) data.frame(low = x, high = x + 1/100))) |>
        as.data.frame() |>
        filter(group == "LDem")

}

datn <- datn |>
    mutate(obj = future_map(model, comparison_func))

plot_df <- datn |>
    dplyr::select(wave, obj) |>
    unnest(cols = obj)


### Convert waves into dates
plot_df <- plot_df |>
    mutate(wave_date = dplyr::recode(wave,
                                     `25` = "2023-05-01",
                                     `24` = "2022-12-01",
                                     `23` = "2022-05-01",
                                     `22` = "2021-11-01",
                                     `21` = "2021-05-01",
                                     `20` = "2020-06-01",
                                     `19` = "2019-12-01",
                                     `18` = "2019-11-15",
                                     `17` = "2019-11-01",
                                     `16` = "2019-05-01",
                                     `15` = "2019-03-01",
                                     `14` = "2018-05-01",
                                     `13` = "2017-06-01",
                                     `12` = "2017-05-01",
                                     `11` = "2017-04-01",
                                     `10` = "2016-11-01",
                                     `9` = "2016-06-01",
                                     `8` = "2016-05-01",
                                     `7` = "2016-04-01",
                                     `6` = "2015-05-01",
                                     `5` = "2015-04-01",
                                     `4` = "2015-03-01",
                                     `3` = "2014-09-01",
                                     `2` = "2014-05-01",
                                     `1` = "2014-02-01"),
           wave_date = as.Date(wave_date))

p <- ggplot(plot_df, aes(x = wave_date,
                    y = estimate,
                    ymin = conf.low,
                    ymax = conf.high)) +
    geom_vline(xintercept = c(as.Date("2015-05-07"),
                              as.Date("2017-06-08"),
                              as.Date("2019-12-12")),
               linetype = 2,
               colour = "darkgrey") +
    geom_hline(yintercept = .01,
               linetype = 2,
               colour = "darkgrey") +
    annotate(geom = "text",
             x = as.Date("2014-01-01"),
             y = 0.01,
             hjust = 0,
             vjust = 0,
             label = "Model reproduces uniform national swing",
             colour = "darkgrey") +
    scale_x_date("Approximate date of fieldwork for each wave") +
    scale_y_continuous("Effect of a percentage point change\nin lagged Lib Dem vote share",
                       labels = scales::percent,
                       breaks = c(0, .0025, 0.005, 0.0075, 0.01)) +
    geom_pointrange(fill = "goldenrod",
                    colour = colorspace::darken("goldenrod"), shape = 24) +
    theme_bw()

ggsave(p,
       file = "consty_fx.png",
       width = 9, height = 6)
	### Load libraries
	library(tidyverse)
	library(haven) ## for Stata import
	library(readxl)
	library(marginaleffects) ## for easy predictions and contrasts
	library(nnet) ## for nnet::multinom
	library(furrr) ## to perform computations in parallel

	### Set fewer workers if you have fewer cores
	plan(multisession, workers = 12)

	### Project assumes you have the following two files in your working directory
	###
	### - BES2019_W25_Panel_v25.1.dta
	### - BES-2019-General-Election-results-file-v1.1.xlsx"
	dat <- read_dta("BES2019_W25_Panel_v25.1.dta",
	col_select = c(id,
	starts_with("pconW"),
	starts_with("generalElectionVote")))

	## Bit of playing around to get labels working properly
	dat <- dat \|>
	filter(!is.na(id)) \|>
	mutate_all(as_factor) \|>
	mutate_all(as.character)

	### We need to reshape the data to move from wide to long format
	### First longer so that each row is id/variable/{value+wave}
	datl <- dat \|>
	pivot_longer(cols = c(starts_with("pcon"),
	starts_with("generalElectionVote")),
	names_to = c("variable", "wave"),
	names_sep = "W") \|>
	mutate(wave = as.numeric(wave))

	### then wider so that each row is id/wave
	datw <- datl \|>
	pivot_wider(id_cols = c(id, wave),
	names_from = variable,
	values_from = value) \|>
	dplyr::select(id, wave, pcon, generalElectionVote)

	### tidy up some of the vote stuff, collapsing minor parties
	datw <- datw \|>
	mutate(vi = dplyr::recode(generalElectionVote,
	"An independent candidate" = "Other",
	"Brexit Party/Reform UK" = "BP/Reform/UKIP",
	"British National Party (BNP)" = "Other",
	"Change UK- The Independent Group" = "Other",
	"Conservative" = "Con",
	"Don't know" = "DK",
	"Green Party" = "Green",
	"I would/did not vote" = "DNV",
	"Labour"= "Lab",
	"Liberal Democrat" = "LDem",
	"Other" = "Other",
	"Plaid Cymru" = "PC",
	"Scottish National Party (SNP)" = "SNP",
	"United Kingdom Independence Party (UKIP)" = "BP/Reform/UKIP")) \|>
	dplyr::select(-generalElectionVote)

	### We now want to collapse this to one row per constituency/wave combination
	### with counts of the various dependent variables
	datw <- datw \|>
	filter(!is.na(vi)) \|>
	filter(vi != "SNP") \|>
	filter(vi != "PC") \|>
	group_by(wave, pcon, vi) \|>
	summarize(count = n(),
	.groups = "drop") \|>
	pivot_wider(names_from = vi,
	values_from = count,
	values_fill = 0L)

	###
	### Link with constituency information
	aux <- read_xlsx("BES-2019-General-Election-results-file-v1.1.xlsx") \|>
	dplyr::select(ConstituencyName, ONSConstID, Country,
	Con19, Lab19, LD19, SNP19, PC19, UKIP19, Green19, Brexit19, Other19,
	Con17, Lab17, LD17, SNP17, PC17, UKIP17, Green17, Other17,
	Con15, Lab15, LD15, SNP15, PC15, UKIP15, Green15, Other15,
	Con10, Lab10, LD10, SNP10, PC10, UKIP10, Green10) \|>
	mutate(across(Con19:Green10, \(x) coalesce(x, 0.0)))

	aux <- aux \|>
	pivot_longer(cols = Con19:Green10) \|>
	mutate(yr = gsub("[^0-9]", "", name),
	party = gsub("[0-9]", "", name),
	value = value / 100) \|>
	pivot_wider(names_from = party,
	values_from = value,
	id_cols =c(ConstituencyName, ONSConstID, Country, yr)) \|>
	mutate(earliest_wave = case_when(yr == "19" ~ 19,
	yr == "17" ~ 13,
	yr == "15" ~ 6,
	yr == "10" ~ 1),
	latest_wave = case_when(yr == "19" ~ 99,
	yr == "17" ~ 19,
	yr == "15" ~ 13,
	yr == "10" ~ 6))

	aux <- aux \|>
	dplyr::select(ConstituencyName,
	ONSConstID,
	Country,
	Con_sh = Con,
	Lab_sh = Lab,
	LD_sh = LD,
	UKIP_sh = UKIP,
	Green_sh = Green,
	Other_sh = Other,
	earliest_wave,
	latest_wave)

	datw <- left_join(datw,
	aux,
	by = join_by(pcon == ConstituencyName,
	wave >= earliest_wave,
	wave < latest_wave))


	### Now we're going to estimate a multinomial model where the vote
	### shares are interacted with wave
	### We're going to keep it simple, and restrict it to England

	datw <- datw \|>
	filter(Country == "England")


	model_func <- function(df) {
	m <- multinom(cbind(DNV, Con, Lab, LDem, `BP/Reform/UKIP`,
	Green, DK, Other) ~ Con_sh + Lab_sh + LD_sh + Green_sh,
	data = df,
	trace = FALSE)
	}

	datn <- datw \|>
	group_by(wave) \|>
	nest() \|>
	mutate(model = future_map(data, model_func))

	comparison_func <- function(model) {
	### For each model, work out the comparison setting LD_sh
	avg_comparisons(model,
	variables = list(LD_sh = \(x) data.frame(low = x, high = x + 1/100))) \|>
	as.data.frame() \|>
	filter(group == "LDem")

	}

	datn <- datn \|>
	mutate(obj = future_map(model, comparison_func))

	plot_df <- datn \|>
	dplyr::select(wave, obj) \|>
	unnest(cols = obj)


	### Convert waves into dates
	plot_df <- plot_df \|>
	mutate(wave_date = dplyr::recode(wave,
	`25` = "2023-05-01",
	`24` = "2022-12-01",
	`23` = "2022-05-01",
	`22` = "2021-11-01",
	`21` = "2021-05-01",
	`20` = "2020-06-01",
	`19` = "2019-12-01",
	`18` = "2019-11-15",
	`17` = "2019-11-01",
	`16` = "2019-05-01",
	`15` = "2019-03-01",
	`14` = "2018-05-01",
	`13` = "2017-06-01",
	`12` = "2017-05-01",
	`11` = "2017-04-01",
	`10` = "2016-11-01",
	`9` = "2016-06-01",
	`8` = "2016-05-01",
	`7` = "2016-04-01",
	`6` = "2015-05-01",
	`5` = "2015-04-01",
	`4` = "2015-03-01",
	`3` = "2014-09-01",
	`2` = "2014-05-01",
	`1` = "2014-02-01"),
	wave_date = as.Date(wave_date))

	p <- ggplot(plot_df, aes(x = wave_date,
	y = estimate,
	ymin = conf.low,
	ymax = conf.high)) +
	geom_vline(xintercept = c(as.Date("2015-05-07"),
	as.Date("2017-06-08"),
	as.Date("2019-12-12")),
	linetype = 2,
	colour = "darkgrey") +
	geom_hline(yintercept = .01,
	linetype = 2,
	colour = "darkgrey") +
	annotate(geom = "text",
	x = as.Date("2014-01-01"),
	y = 0.01,
	hjust = 0,
	vjust = 0,
	label = "Model reproduces uniform national swing",
	colour = "darkgrey") +
	scale_x_date("Approximate date of fieldwork for each wave") +
	scale_y_continuous("Effect of a percentage point change\nin lagged Lib Dem vote share",
	labels = scales::percent,
	breaks = c(0, .0025, 0.005, 0.0075, 0.01)) +
	geom_pointrange(fill = "goldenrod",
	colour = colorspace::darken("goldenrod"), shape = 24) +
	theme_bw()

	ggsave(p,
	file = "consty_fx.png",
	width = 9, height = 6)