johnburnmurdoch/USoc_driving_licence.R

## USoc_driving_licence.R
needs(sjlabelled, tidyverse, haven, magrittr)

# Load wave 8
USoc_indresp_8 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w8/h_indresp.dta", encoding = "latin1")

# Load all other waves
USoc_indresp_1 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w1/a_indresp.dta", encoding = "latin1")
USoc_indresp_2 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w2/b_indresp.dta", encoding = "latin1")
USoc_indresp_3 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w3/c_indresp.dta", encoding = "latin1")
USoc_indresp_4 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w4/d_indresp.dta", encoding = "latin1")
USoc_indresp_5 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w5/e_indresp.dta", encoding = "latin1")
USoc_indresp_6 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w6/f_indresp.dta", encoding = "latin1")
USoc_indresp_7 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w7/g_indresp.dta", encoding = "latin1")

# Join latest wave full dataset with driving license variable of all previous waves
USoc_indresp_8 %>%
  dplyr::select(pidp, h_drive, h_vote8, h_indinub_xw) %>%
  left_join(USoc_indresp_1 %>% dplyr::select(pidp, a_drive)) %>%
  left_join(USoc_indresp_2 %>% dplyr::select(pidp, b_drive)) %>%
  left_join(USoc_indresp_3 %>% dplyr::select(pidp, c_drive)) %>%
  left_join(USoc_indresp_4 %>% dplyr::select(pidp, d_drive)) %>%
  left_join(USoc_indresp_5 %>% dplyr::select(pidp, e_drive)) %>%
  left_join(USoc_indresp_6 %>% dplyr::select(pidp, f_drive)) %>%
  left_join(USoc_indresp_7 %>% dplyr::select(pidp, g_drive)) %>%
  # Use party names instead of codes
  mutate(h_vote8 = as_label(h_vote8)) %>%
  # Pivot long, so each person ID is now associated with a series of driving licence data points, one for each wave, in a column
  gather(wave, drive, c(2,5:ncol(.))) %>%
  # Group by person id, weight and vote
  group_by(pidp, h_indinub_xw, h_vote8) %>%
  # For each person, set their driving licence data point to be "Yes" if "Yes" appears at all, otherwise "No". (I’m naively assuming here that nobody has lost their licence)
  summarise(drive = ifelse(1 %in% drive, 1, 2)) %>%
  ungroup() %>%
  # Tally up all combos of driving licence x GE vote, using 'h_indinub_xw' weights (Per Sturgis and Jennings https://www.sciencedirect.com/science/article/pii/S026137941930071X?via%3Dihub)
  count(drive, h_vote8, wt = h_indinub_xw) %>%
  # Group by GE vote
  group_by(h_vote8) %>%
  # Convert weighted numbers to shares
  mutate(share = n/sum(n)*100) %>%
  ungroup() %>%
  # Filter out missing data for driving licence
  filter(is.finite(share) & drive > 0) %>%
  # Recode driving licence from codes to names
  mutate(drive = c("Yes", "No")[drive]) %>%
  group_by(h_vote8) %>%
  # Filter out parties with tiny samples, and missing GE vote data
  filter(sum(n) > 96 & !h_vote8 %in% c("inapplicable", "refusal")) %>%
  mutate(n = sum(n)) %>%
  # Spread wide again
  spread(drive, share) %>%
  # Sort by % with a licence
  arrange(desc(Yes))

# As above, but doing vote by driving licence, instead of driving licence by vote
USoc_indresp_8 %>%
  dplyr::select(pidp, h_drive, h_vote8, h_indinub_xw) %>%
  left_join(USoc_indresp_1 %>% dplyr::select(pidp, a_drive)) %>%
  left_join(USoc_indresp_2 %>% dplyr::select(pidp, b_drive)) %>%
  left_join(USoc_indresp_3 %>% dplyr::select(pidp, c_drive)) %>%
  left_join(USoc_indresp_4 %>% dplyr::select(pidp, d_drive)) %>%
  left_join(USoc_indresp_5 %>% dplyr::select(pidp, e_drive)) %>%
  left_join(USoc_indresp_6 %>% dplyr::select(pidp, f_drive)) %>%
  left_join(USoc_indresp_7 %>% dplyr::select(pidp, g_drive)) %>%
  mutate(h_vote8 = as_label(h_vote8)) %>%
  gather(wave, drive, c(2,5:ncol(.))) %>%
  group_by(pidp, h_indinub_xw, h_vote8) %>%
  summarise(drive = ifelse(1 %in% drive, 1, 2)) %>%
  ungroup() %>%
  count(drive, h_vote8, wt = h_indinub_xw) %>%
  filter(drive > 0) %>%
  mutate(drive = c("Yes", "No")[drive]) %>%
  filter(!h_vote8 %in% c("inapplicable", "refusal")) %>%
  group_by(drive) %>%
  mutate(share = n/sum(n)*100) %>%
  View
	needs(sjlabelled, tidyverse, haven, magrittr)

	# Load wave 8
	USoc_indresp_8 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w8/h_indresp.dta", encoding = "latin1")

	# Load all other waves
	USoc_indresp_1 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w1/a_indresp.dta", encoding = "latin1")
	USoc_indresp_2 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w2/b_indresp.dta", encoding = "latin1")
	USoc_indresp_3 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w3/c_indresp.dta", encoding = "latin1")
	USoc_indresp_4 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w4/d_indresp.dta", encoding = "latin1")
	USoc_indresp_5 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w5/e_indresp.dta", encoding = "latin1")
	USoc_indresp_6 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w6/f_indresp.dta", encoding = "latin1")
	USoc_indresp_7 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w7/g_indresp.dta", encoding = "latin1")

	# Join latest wave full dataset with driving license variable of all previous waves
	USoc_indresp_8 %>%
	dplyr::select(pidp, h_drive, h_vote8, h_indinub_xw) %>%
	left_join(USoc_indresp_1 %>% dplyr::select(pidp, a_drive)) %>%
	left_join(USoc_indresp_2 %>% dplyr::select(pidp, b_drive)) %>%
	left_join(USoc_indresp_3 %>% dplyr::select(pidp, c_drive)) %>%
	left_join(USoc_indresp_4 %>% dplyr::select(pidp, d_drive)) %>%
	left_join(USoc_indresp_5 %>% dplyr::select(pidp, e_drive)) %>%
	left_join(USoc_indresp_6 %>% dplyr::select(pidp, f_drive)) %>%
	left_join(USoc_indresp_7 %>% dplyr::select(pidp, g_drive)) %>%
	# Use party names instead of codes
	mutate(h_vote8 = as_label(h_vote8)) %>%
	# Pivot long, so each person ID is now associated with a series of driving licence data points, one for each wave, in a column
	gather(wave, drive, c(2,5:ncol(.))) %>%
	# Group by person id, weight and vote
	group_by(pidp, h_indinub_xw, h_vote8) %>%
	# For each person, set their driving licence data point to be "Yes" if "Yes" appears at all, otherwise "No". (I’m naively assuming here that nobody has lost their licence)
	summarise(drive = ifelse(1 %in% drive, 1, 2)) %>%
	ungroup() %>%
	# Tally up all combos of driving licence x GE vote, using 'h_indinub_xw' weights (Per Sturgis and Jennings https://www.sciencedirect.com/science/article/pii/S026137941930071X?via%3Dihub)
	count(drive, h_vote8, wt = h_indinub_xw) %>%
	# Group by GE vote
	group_by(h_vote8) %>%
	# Convert weighted numbers to shares
	mutate(share = n/sum(n)*100) %>%
	ungroup() %>%
	# Filter out missing data for driving licence
	filter(is.finite(share) & drive > 0) %>%
	# Recode driving licence from codes to names
	mutate(drive = c("Yes", "No")[drive]) %>%
	group_by(h_vote8) %>%
	# Filter out parties with tiny samples, and missing GE vote data
	filter(sum(n) > 96 & !h_vote8 %in% c("inapplicable", "refusal")) %>%
	mutate(n = sum(n)) %>%
	# Spread wide again
	spread(drive, share) %>%
	# Sort by % with a licence
	arrange(desc(Yes))

	# As above, but doing vote by driving licence, instead of driving licence by vote
	USoc_indresp_8 %>%
	dplyr::select(pidp, h_drive, h_vote8, h_indinub_xw) %>%
	left_join(USoc_indresp_1 %>% dplyr::select(pidp, a_drive)) %>%
	left_join(USoc_indresp_2 %>% dplyr::select(pidp, b_drive)) %>%
	left_join(USoc_indresp_3 %>% dplyr::select(pidp, c_drive)) %>%
	left_join(USoc_indresp_4 %>% dplyr::select(pidp, d_drive)) %>%
	left_join(USoc_indresp_5 %>% dplyr::select(pidp, e_drive)) %>%
	left_join(USoc_indresp_6 %>% dplyr::select(pidp, f_drive)) %>%
	left_join(USoc_indresp_7 %>% dplyr::select(pidp, g_drive)) %>%
	mutate(h_vote8 = as_label(h_vote8)) %>%
	gather(wave, drive, c(2,5:ncol(.))) %>%
	group_by(pidp, h_indinub_xw, h_vote8) %>%
	summarise(drive = ifelse(1 %in% drive, 1, 2)) %>%
	ungroup() %>%
	count(drive, h_vote8, wt = h_indinub_xw) %>%
	filter(drive > 0) %>%
	mutate(drive = c("Yes", "No")[drive]) %>%
	filter(!h_vote8 %in% c("inapplicable", "refusal")) %>%
	group_by(drive) %>%
	mutate(share = n/sum(n)*100) %>%
	View