Skip to content

Instantly share code, notes, and snippets.

@cimentadaj
Last active February 28, 2020 11:57
Show Gist options
  • Save cimentadaj/e62b674700173434e4d6f312b0133030 to your computer and use it in GitHub Desktop.
Save cimentadaj/e62b674700173434e4d6f312b0133030 to your computer and use it in GitHub Desktop.
library(essurvey)
library(dplyr)
library(purrr)
## This works for the UK, where PhD is coded as `1` whereas
## in other countries PhD is coded as the maximum level
reverse_coding <- function(x, na_vals) {
## Find out the maximum + 1 to subtract that from the vector
## to reverse it
max_val <- max(setdiff(x, na_vals), na.rm = TRUE) + 1
# Exclude all missing codes and NA
filt <- !(x %in% na_vals) & !is.na(x)
# Reverse code the vector
x[filt] <- abs(x[filt] - max_val)
# Repeate similarly for the labels + sorting
attr_labels <- attributes(x)$labels
filt_attr <- !(attr_labels %in% na_vals) & !is.na(attr_labels)
attr_labels[filt_attr] <- abs(attr_labels[filt_attr] - max_val)
sorted_labels <- sort(attr_labels, na.last = TRUE)
attributes(x)$labels <- sorted_labels
x
}
## Given a country data frame and variable,
## return the unique country-specific education
## variable and the equivalent eisced
unique_var <- function(df, var, cnt) {
if (cnt == "United Kingdom") {
## 5555 is other and 11 is None of these degrees
## we want to keep them with their initial coding
df[[var]] <- reverse_coding(df[[var]], na_vals = c(5555, 11))
}
df %>%
count(!!sym(var), eisced) %>%
mutate(country = cnt, eduvar = !!sym(var)) %>%
select(country, eduvar, eisced, -n)
}
## Country = Education variable
cnts <- c(
"Austria" = "edlveat",
"Italy" = "edlvdit",
"Belgium" = "edlvebe",
"Netherlands" = "edlvenl",
"Switzerland" = "edlvdch",
"Spain" = "edlvges",
"United Kingdom" = "eduagb2"
)
## Download the ESS data
set_email("cimentadaj@gmail.com")
ess_eight <- import_rounds(8)
## Loop over each country and show the country-specific education variable
## and the ISCED equivalent
res <-
cnts %>%
imap(~ unique_var(ess_eight, .x, .y))
res
## Germany has three education groups
## Same variable but 1, 2, 3
## Don't know which variable to choose.
## UK has the same, but I just did it for
## one, just to have an example
gr <- import_country("Germany", 8)
gr %>%
select(edubde1, eisced)
library(haven)
walk(res, ~ {
cat("\n")
cat(unique(.x$country), sep = "\n")
attr(.x$eduvar, "labels") %>%
paste0(" ", "- ", ., ". ", names(.)) %>%
cat(sep = "\n")
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment