Skip to content

Instantly share code, notes, and snippets.

@johnburnmurdoch
Created October 13, 2019 14:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnburnmurdoch/0054858c00a2bc778c77e8586e9d234c to your computer and use it in GitHub Desktop.
Save johnburnmurdoch/0054858c00a2bc778c77e8586e9d234c to your computer and use it in GitHub Desktop.
needs(sjlabelled, tidyverse, haven, magrittr)
# Load wave 8
USoc_indresp_8 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w8/h_indresp.dta", encoding = "latin1")
# Load all other waves
USoc_indresp_1 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w1/a_indresp.dta", encoding = "latin1")
USoc_indresp_2 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w2/b_indresp.dta", encoding = "latin1")
USoc_indresp_3 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w3/c_indresp.dta", encoding = "latin1")
USoc_indresp_4 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w4/d_indresp.dta", encoding = "latin1")
USoc_indresp_5 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w5/e_indresp.dta", encoding = "latin1")
USoc_indresp_6 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w6/f_indresp.dta", encoding = "latin1")
USoc_indresp_7 <- read_dta("~/Downloads/UKDA-6614-stata/stata11_se/ukhls_w7/g_indresp.dta", encoding = "latin1")
# Join latest wave full dataset with driving license variable of all previous waves
USoc_indresp_8 %>%
dplyr::select(pidp, h_drive, h_vote8, h_indinub_xw) %>%
left_join(USoc_indresp_1 %>% dplyr::select(pidp, a_drive)) %>%
left_join(USoc_indresp_2 %>% dplyr::select(pidp, b_drive)) %>%
left_join(USoc_indresp_3 %>% dplyr::select(pidp, c_drive)) %>%
left_join(USoc_indresp_4 %>% dplyr::select(pidp, d_drive)) %>%
left_join(USoc_indresp_5 %>% dplyr::select(pidp, e_drive)) %>%
left_join(USoc_indresp_6 %>% dplyr::select(pidp, f_drive)) %>%
left_join(USoc_indresp_7 %>% dplyr::select(pidp, g_drive)) %>%
# Use party names instead of codes
mutate(h_vote8 = as_label(h_vote8)) %>%
# Pivot long, so each person ID is now associated with a series of driving licence data points, one for each wave, in a column
gather(wave, drive, c(2,5:ncol(.))) %>%
# Group by person id, weight and vote
group_by(pidp, h_indinub_xw, h_vote8) %>%
# For each person, set their driving licence data point to be "Yes" if "Yes" appears at all, otherwise "No". (I’m naively assuming here that nobody has lost their licence)
summarise(drive = ifelse(1 %in% drive, 1, 2)) %>%
ungroup() %>%
# Tally up all combos of driving licence x GE vote, using 'h_indinub_xw' weights (Per Sturgis and Jennings https://www.sciencedirect.com/science/article/pii/S026137941930071X?via%3Dihub)
count(drive, h_vote8, wt = h_indinub_xw) %>%
# Group by GE vote
group_by(h_vote8) %>%
# Convert weighted numbers to shares
mutate(share = n/sum(n)*100) %>%
ungroup() %>%
# Filter out missing data for driving licence
filter(is.finite(share) & drive > 0) %>%
# Recode driving licence from codes to names
mutate(drive = c("Yes", "No")[drive]) %>%
group_by(h_vote8) %>%
# Filter out parties with tiny samples, and missing GE vote data
filter(sum(n) > 96 & !h_vote8 %in% c("inapplicable", "refusal")) %>%
mutate(n = sum(n)) %>%
# Spread wide again
spread(drive, share) %>%
# Sort by % with a licence
arrange(desc(Yes))
# As above, but doing vote by driving licence, instead of driving licence by vote
USoc_indresp_8 %>%
dplyr::select(pidp, h_drive, h_vote8, h_indinub_xw) %>%
left_join(USoc_indresp_1 %>% dplyr::select(pidp, a_drive)) %>%
left_join(USoc_indresp_2 %>% dplyr::select(pidp, b_drive)) %>%
left_join(USoc_indresp_3 %>% dplyr::select(pidp, c_drive)) %>%
left_join(USoc_indresp_4 %>% dplyr::select(pidp, d_drive)) %>%
left_join(USoc_indresp_5 %>% dplyr::select(pidp, e_drive)) %>%
left_join(USoc_indresp_6 %>% dplyr::select(pidp, f_drive)) %>%
left_join(USoc_indresp_7 %>% dplyr::select(pidp, g_drive)) %>%
mutate(h_vote8 = as_label(h_vote8)) %>%
gather(wave, drive, c(2,5:ncol(.))) %>%
group_by(pidp, h_indinub_xw, h_vote8) %>%
summarise(drive = ifelse(1 %in% drive, 1, 2)) %>%
ungroup() %>%
count(drive, h_vote8, wt = h_indinub_xw) %>%
filter(drive > 0) %>%
mutate(drive = c("Yes", "No")[drive]) %>%
filter(!h_vote8 %in% c("inapplicable", "refusal")) %>%
group_by(drive) %>%
mutate(share = n/sum(n)*100) %>%
View
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment