Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Last active September 18, 2024 11:16
Show Gist options
  • Save chrishanretty/2b67686cc1fdf6661e9eec45d91f4f1e to your computer and use it in GitHub Desktop.
Save chrishanretty/2b67686cc1fdf6661e9eec45d91f4f1e to your computer and use it in GitHub Desktop.
Analysis of Welsh Senedd constituency proposal
### Purpose of this code:
### - To generate pairwise average travel times between Westminster constituencies
### - To sample contiguous pairings of Westminster constituencies
### - To evaluate the travel times between pairings
### - To evaluate the compactness of pairings
### Inputs
### - Travel time data from https://data.ubdc.ac.uk/datasets/public-transport-travel-time-matrices-for-great-britain-ttm-2023
### - Shapefiles from https://geoportal.statistics.gov.uk/datasets/0d698c9712de4afcac9377367d831c1a_0/explore
### - lookups from https://geoportal.statistics.gov.uk/
### Outputs
### - Sampled proposals
### - Map
### - Measures
### Load libraries
library(tidyverse)
library(arrow)
library(sf)
library(here)
library(spdep)
library(parallel)
library(ggrepel)
library(igraph)
library(combinat)
library(future)
library(future.apply)
library(tictoc)
library(redistmetrics)
here::i_am("analysis_v2.R")
set.seed(2843)
num_cores <- parallel::detectCores() - 2
cl <- makeCluster(num_cores)
### Set up locations of different objects
ttm_path <- "~/Downloads/ttm/LSOA/LSOA records/ttm_pt"
oa11_wmin24_path <- "Output_area_(2011)_to_future_Parliamentary_Constituencies_Lookup_in_England_and_Wales.csv"
oa11_lsoa11_path <- "Postcode_to_Output_Area_to_Lower_Layer_Super_Output_Area_to_Middle_Layer_Super_Output_Area_to_Local_Authority_District_November_2018_Lookup_in_the_UK.csv"
if (!file.exists(oa11_wmin24_path) | !file.exists(oa11_lsoa11_path)) {
stop("You need to download the appropriate lookups and put them in this folder")
}
### locations for different temporary objects
pairwise_ttm_path <- here::here("working", "pairwise_ttms.rds")
### #############################################################################
### Part 1: travel times
### #############################################################################
if (file.exists(pairwise_ttm_path)) {
ttm <- readRDS(pairwise_ttm_path)
} else {
infiles <- list.files(ttm_path,
pattern = ".parquet",
full.names = TRUE)
read_and_filter <- function(file) {
read_parquet(file) |>
filter(grepl("^W", from_id)) |>
filter(grepl("^W", to_id)) |>
filter(time_of_day == "am") |>
dplyr::select(from_id,
to_id,
travel_time_p25)
}
### This takes a couple of minutes
ttm <- lapply(infiles, read_and_filter) |>
bind_rows()
### Amend the ttm to include values over 150
ttm <- ttm |>
mutate(travel_time_p25 = coalesce(travel_time_p25, 151))
### We now need to move between LSOA 2011 level and 2024 Westminster constituency level
### To do this we need to drop down to OA 2011 level and reaggregate
oa11_wmin24 <- read_csv(oa11_wmin24_path,
col_select = c(OA11CD, PCON25CD)) |>
filter(grepl("W", PCON25CD))
### Assign each OA2011 to an LSOA2011
oa11_lsoa11 <- read_csv(oa11_lsoa11_path,
col_select = c(oa11cd, lsoa11cd)) |>
distinct() |>
filter(grepl("W", oa11cd))
### Join the two data sources
oa11_wmin24 <- left_join(oa11_wmin24,
oa11_lsoa11,
by = join_by(OA11CD == oa11cd))
### Picking the most common Westminster constituency for each LSOA
most_common_or_random <- function(x) {
names(sort(table(x), decreasing = TRUE))[1]
}
lsoa11_wmin24 <- oa11_wmin24 |>
group_by(lsoa11cd) |>
summarize(PCON25CD = most_common_or_random(PCON25CD))
### Add this on to the TTM data
ttm <- left_join(ttm,
lsoa11_wmin24 |> dplyr::select(lsoa11cd, from_pcon = PCON25CD),
by = join_by(from_id == lsoa11cd))
ttm <- left_join(ttm,
lsoa11_wmin24 |> dplyr::select(lsoa11cd, to_pcon = PCON25CD),
by = join_by(to_id == lsoa11cd))
### We now want to create all pairings of relevant constituencies
### These are all the Welsh constituencies, minus the
### constituencies which can be paired up on the basis of
### contiguity alone
constituencies <- lsoa11_wmin24 |>
filter(grepl("^W", PCON25CD)) |>
pull(PCON25CD) |>
unique()
combinations <- expand.grid(A = constituencies,
B = constituencies,
stringsAsFactors = FALSE) |>
filter(A <= B)
### Now write a function to interrogate the ttm data
get_ttm <- function(x, y) {
ttm |>
filter(from_pcon %in% x) |>
filter(to_pcon %in% y) |>
pull(travel_time_p25) |>
median(na.rm = TRUE)
}
combinations <- combinations |>
mutate(avg_travel_time = map2_dbl(A, B, get_ttm))
### overwrite ttm
ttm <- combinations
rm(combinations, lsoa11_wmin24, oa11_wmin24)
saveRDS(ttm, file = pairwise_ttm_path)
}
### #############################################################################
### Part 2: generating contiguous pairings
### #############################################################################
seat_map <- read_sf("PCON_JULY_2024_UK_BFC.shp")
wales_seats <- seat_map %>%
filter(str_detect(PCON24CD, "W")) %>%
# Drop constituencies that have determined pairs
filter(!(PCON24CD %in% c(
"W07000112", # Ynys Môn,
"W07000083", # Bangor Aberconwy
"W07000094", # Clwyd East
"W07000095", # Clwyd North
"W07000082", # Alyn and Deeside
"W07000111", # Wrexham
"W07000096", # Dwyfor Meirionnydd
"W07000102" # Montgomeryshire and Glyndwr
))) %>%
mutate(constituency_index = row_number())
# Note there's two other pairings that weren't previously excluded:
# Alyn and Deeside can only be paired with Wrexham given Clwyd pairing
# Dwyfor Meirionnydd can only be paired with Montgomeryshire and Glyndwr.
# At first glance it looks like Dwyfor Meirionnydd could be paired with
# Ceredigion Preseli, but there's a river between them with no bridge directly linking the two
# (This is apparently captured in the shape file - took me awhile to figure out why they couldn't be paired!)
## Calculate contiguity between constituencies
# Note There's some disagreement between st_relate() st_touches() around whether all four
# of the constituencies that meet at the nexus of Neath and Swansea East, Merthyr Tydfil
# and Aberdare, Rhondda and Ogmore, and Aberafan Maesteg touch each other. st_touch() thinks
# the diagonally opposite constituencies (e.g. Neath and Swansea East + Rhondda and Ogmore) 'touch'
# But st_relate() doesn't. The st_relate() version looks more sensible to me, but could perhaps be more
# precise as to what counts as contiguous
### This DE-9IM pattern means "The objects do not share any interior
### points, but their boundaries intersect"
contiguity_matrix <- st_relate(wales_seats, pattern = "F***1****")
## Create a graph from the contiguity matrix
contiguity_graph <- graph_from_adjacency_matrix(as.matrix(contiguity_matrix),
mode = "undirected")
## Find all contiguous pairs of constituencies
pairs <- as_data_frame(contiguity_graph, what = "edges") %>% as_tibble()
# Count number of pairs each constituency appears in and arrange so least common comes at the top
# Note - I *think* if you start with the seats with the fewest options you're less likely to end up with
# situations where you run out of possible pairings later on, which seems more efficient, but I'm just assuming that
pair_count <- bind_rows(
pairs %>% count(from) %>% rename("constituency" = "from"),
pairs %>% count(to) %>% rename("constituency" = "to")) %>%
group_by(constituency) %>%
summarise(n = sum(n)) %>%
arrange(n)
# Function to take a set of constituency pairings, find the remaining pairs that don't contain seats already in the set,
# choose the constituency with the fewest potential pairings and create new sets of pairings that add each potential pairing
# to the initial set of pairings.
add_next_seat <- function(sets, this_set) {
# Identify constituencies used in current set
constituencies_in_set <- c(sets[[this_set]]$from, sets[[this_set]]$to) %>%
unique()
# Find pairs that don't contain constituencies already in set
candidate_pairs <- pairs %>%
filter(!(from %in% constituencies_in_set)) %>%
filter(!(to %in% constituencies_in_set))
# Check that there is at least one candidate pair, and if so add all pairings to the initial set
if(length(candidate_pairs$from) > 0) {
# Count constituency appearances
candidate_pair_count <- bind_rows(
candidate_pairs %>% count(from) %>% rename("constituency" = "from"),
candidate_pairs %>% count(to) %>% rename("constituency" = "to")) %>%
group_by(constituency) %>%
summarise(n = sum(n)) %>%
arrange(n)
# Pull out pairs to be added
next_pairs <- candidate_pairs %>%
filter(from == candidate_pair_count$constituency[1] | to == candidate_pair_count$constituency[1])
# Turn into a list of one row tibbles
updating_set <- next_pairs %>%
rowwise() %>%
group_split()
# Add each pair to the initial set, returning a list of updated sets
updated_sets <- map(1:length(updating_set), function(this_one) {
return(
sets[[this_set]] %>%
bind_rows(updating_set[[this_one]])
)
})
return(updated_sets)
}
}
# Create starting set of seats, choosing the constituency with the fewest pairs (with ties broken however arrange()
# does it by default)
working_set <- pairs %>%
filter(from == pair_count$constituency[1] | to == pair_count$constituency[1]) %>%
rowwise() %>%
group_split()
# Then take the previous set of pairs, and map add_next_seat() over all of them, creating a new sets of pairings
# Then repeat until you've done all 12 seats (+ the 4 pre-determined ones)
current_seats <- 1
while(current_seats < 12) {
working_set <- map(1:length(working_set),
add_next_seat,
sets = working_set) %>%
flatten()
current_seats <- current_seats + 1
}
# Check for duplicates
# Function to remove duplicate sets
remove_duplicate_sets <- function(set_to_check) {
#Sort sets
sorted_sets <- future_lapply(set_to_check, function(tbl) {
tbl %>% arrange(from, to)
})
# Filter out duplicates
unique_sets <- sorted_sets %>% unique()
return(unique_sets)
}
# Apparently there aren't any duplicates, but worth checking anyway!
all_potential_pairings <- remove_duplicate_sets(working_set)
# Add identifiers etc
constituency_codes <- wales_seats %>%
as_tibble() %>%
select(constituency_index, PCON24CD)
pre_set_pairs <- tibble(
PCON24CD = c("W07000112", # Ynys Môn,
"W07000083", # Bangor Aberconwy
"W07000094", # Clwyd East
"W07000095", # Clwyd North
"W07000082", # Alyn and Deeside
"W07000111", # Wrexham
"W07000096", # Dwyfor Meirionnydd
"W07000102"), # Montgomeryshire and Glyndwr
pair = c(1, 1, 2, 2, 3, 3, 4, 4))
all_pairs <- lapply(all_potential_pairings, function(set) {
pre_set_pairs %>%
bind_rows(
set %>%
mutate(pair = 5:16) %>%
pivot_longer(-pair, values_to = "constituency_index") %>%
left_join(constituency_codes, by = join_by(constituency_index))) %>%
select(PCON24CD, pair)
})
### Got to transform this into the way we expect other stuff to be listed
### This is a list of matrices, where the matrix is sorted alphabetical along the rows, then alphabetically along the first column.
all_pairs.bak <- all_pairs
all_pairs <- lapply(all_pairs, function(x) {
x <- x |>
group_by(pair) |>
reframe(A = PCON24CD[1], B = PCON24CD[2]) |>
as.matrix()
out <- x[,c("A", "B")]
### Sort them across the rows
out <- t(apply(out, 1, sort))
### Sort them by order on the first column
out <- out[order(out[,1]),]
})
dat <- tibble(id = 1:length(all_pairs), pairing = all_pairs)
### #############################################################################
### Part 3: evaluating pairings by travel time
### #############################################################################
avg_ttm <- function(pairing) {
as.data.frame(pairing) |>
left_join(ttm, by = join_by(V1 == A, V2 == B)) |>
pull(avg_travel_time) |>
median(na.rm = TRUE)
}
dat <- dat |>
mutate(avg_travel_time = map_dbl(pairing, avg_ttm))
### #############################################################################
### Part 4: evaluating pairings by compactness
### #############################################################################
get_compactness <- function(pairing) {
require(tidyverse)
require(sf)
require(redistmetrics)
local <- pairing |>
as.data.frame() |>
mutate(grp = letters[seq_len(n())])
local_map <- left_join(wales_seats,
local,
by = join_by(PCON24CD == V1))
local_map <- left_join(local_map,
local,
by = join_by(PCON24CD == V2),
suffix = c(".v1", ".v2"))
local_map <- local_map |>
mutate(grp = ifelse(is.na(grp.v1),
grp.v2,
grp.v1))
local_map <- local_map |>
mutate(grp = factor(grp),
grp_num = as.numeric(grp))
mean(comp_polsby(plans = local_map$grp_num, shp = local_map))
}
clusterExport(cl, c("wales_seats"))
dat$compactness <- parSapply(cl, dat$pairing, get_compactness)
saveRDS(dat, file = "output.rds")
### #############################################################################
### Part 5: plotting things
### #############################################################################
### Work out what the boundary commission proposal is
### Let's work out the Boundary Commission proposal
bc <- matrix(c(
## Alyn, Deeside and Wrexham
"W07000082", "W07000111",
## Dwford Meirionnydd, Montgomeryshire and Glyndwr
"W07000096", "W07000102",
## Ceredigion and Pembrokeshire
"W07000093", "W07000100",
## Carmarthenshire
"W07000087", "W07000098",
## Swansea West and Gower
"W07000108", "W07000097",
## Brecon, Radnor, Neath and Swansea East
"W07000085", "W07000103",
## Aberafan Maesteg, Rhondda, Ogmore
"W07000081", "W07000107",
## Merthyr Tydfil, Aberdare and Pontypridd
"W07000099", "W07000106",
## Blaneau Gwent, Rhymney and Caerphilly
"W07000084", "W07000088",
## Monmouthshire and Torfaen
"W07000101", "W07000109",
## Newport and Islwyn
"W07000104", "W07000105",
## Cardiff East and North
"W07000089", "W07000090",
## Cardiff West, South and Penarth
"W07000092", "W07000091",
## Vale of Glamorgan and Bridgend
"W07000110", "W07000086"),
nrow = 14,
ncol = 2,
byrow = TRUE)
### Sort it the same way the other things have been sorted
### Sort them across the rows
bc <- t(apply(bc, 1, sort))
### Sort them by order on the first column
bc <- bc[order(bc[,1]),]
dat$is_bc <- sapply(dat$pairing, function(x)identical(x, bc))
ggplot(dat, aes(x = compactness, y = avg_travel_time, colour = is_bc)) +
geom_point() +
scale_x_continuous("Compactness\n(Polsby-Propper metric, bigger is better)") +
scale_y_continuous("Average travel time\n(minutes, smaller is better)") +
scale_colour_manual("",
guide = "none",
values = c("darkgrey", "red")) +
theme_bw()
### #############################################################################
### Part 6: mapping things
### #############################################################################
report_pairing <- function(pairing) {
local <- pairing |>
as.data.frame() |>
mutate(grp = letters[seq_len(n())])
lu <- wales_seats |>
st_drop_geometry() |>
dplyr::select(PCON24CD, PCON24NM)
local <- left_join(local,
lu,
by = join_by(V1 == PCON24CD))
local <- left_join(local,
lu,
by = join_by(V2 == PCON24CD),
suffix = c(".v1", ".v2"))
local <- local |>
rowwise() |>
mutate(combined_name = ifelse(PCON24NM.v1 < PCON24NM.v2,
paste(PCON24NM.v1,PCON24NM.v2, sep = "; "),
paste(PCON24NM.v2,PCON24NM.v1, sep = "; "))) |>
ungroup() |>
arrange(combined_name) |>
pull(combined_name)
### Add on the two known pairings
knowns <- c("Bangor Aberconwy Ynys Môn",
"Clwyd")
local <- c(local, knowns)
local <- sort(local)
local <- local |>
paste0(collapse = "\n")
return(local)
}
plot_pairing <- function(pairing) {
local <- pairing |>
as.data.frame() |>
mutate(grp = letters[seq_len(n())])
local_map <- left_join(wales_seats,
local,
by = join_by(PCON24CD == V1))
local_map <- left_join(local_map,
local,
by = join_by(PCON24CD == V2),
suffix = c(".v1", ".v2"))
local_map <- local_map |>
mutate(grp = ifelse(is.na(grp.v1),
grp.v2,
grp.v1))
if (FALSE) {
ggplot() +
geom_sf(data = local_map, aes(fill = grp)) +
ggrepel::geom_label_repel(
data = local_map,
aes(label = PCON24NM, geometry = geometry),
stat = "sf_coordinates",
size = 1.5
) +
scale_fill_discrete(guide = "none") +
theme_minimal()
} else {
ggplot() +
geom_sf(data = local_map, aes(fill = grp)) +
geom_sf_label(
data = local_map,
aes(label = str_wrap(PCON24NM, 20)),
size = 1.5
) +
scale_fill_discrete(guide = "none") +
theme_minimal()
}
}
### Report details of the most compact mix (which also happens to do well on travel time)
###
best <- which.max(dat$compactness)
plot_pairing(dat$pairing[[best]])
cat(report_pairing(dat$pairing[[best]]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment