Last active
February 28, 2020 11:57
-
-
Save cimentadaj/e62b674700173434e4d6f312b0133030 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(essurvey) | |
library(dplyr) | |
library(purrr) | |
## This works for the UK, where PhD is coded as `1` whereas | |
## in other countries PhD is coded as the maximum level | |
reverse_coding <- function(x, na_vals) { | |
## Find out the maximum + 1 to subtract that from the vector | |
## to reverse it | |
max_val <- max(setdiff(x, na_vals), na.rm = TRUE) + 1 | |
# Exclude all missing codes and NA | |
filt <- !(x %in% na_vals) & !is.na(x) | |
# Reverse code the vector | |
x[filt] <- abs(x[filt] - max_val) | |
# Repeate similarly for the labels + sorting | |
attr_labels <- attributes(x)$labels | |
filt_attr <- !(attr_labels %in% na_vals) & !is.na(attr_labels) | |
attr_labels[filt_attr] <- abs(attr_labels[filt_attr] - max_val) | |
sorted_labels <- sort(attr_labels, na.last = TRUE) | |
attributes(x)$labels <- sorted_labels | |
x | |
} | |
## Given a country data frame and variable, | |
## return the unique country-specific education | |
## variable and the equivalent eisced | |
unique_var <- function(df, var, cnt) { | |
if (cnt == "United Kingdom") { | |
## 5555 is other and 11 is None of these degrees | |
## we want to keep them with their initial coding | |
df[[var]] <- reverse_coding(df[[var]], na_vals = c(5555, 11)) | |
} | |
df %>% | |
count(!!sym(var), eisced) %>% | |
mutate(country = cnt, eduvar = !!sym(var)) %>% | |
select(country, eduvar, eisced, -n) | |
} | |
## Country = Education variable | |
cnts <- c( | |
"Austria" = "edlveat", | |
"Italy" = "edlvdit", | |
"Belgium" = "edlvebe", | |
"Netherlands" = "edlvenl", | |
"Switzerland" = "edlvdch", | |
"Spain" = "edlvges", | |
"United Kingdom" = "eduagb2" | |
) | |
## Download the ESS data | |
set_email("cimentadaj@gmail.com") | |
ess_eight <- import_rounds(8) | |
## Loop over each country and show the country-specific education variable | |
## and the ISCED equivalent | |
res <- | |
cnts %>% | |
imap(~ unique_var(ess_eight, .x, .y)) | |
res | |
## Germany has three education groups | |
## Same variable but 1, 2, 3 | |
## Don't know which variable to choose. | |
## UK has the same, but I just did it for | |
## one, just to have an example | |
gr <- import_country("Germany", 8) | |
gr %>% | |
select(edubde1, eisced) | |
library(haven) | |
walk(res, ~ { | |
cat("\n") | |
cat(unique(.x$country), sep = "\n") | |
attr(.x$eduvar, "labels") %>% | |
paste0(" ", "- ", ., ". ", names(.)) %>% | |
cat(sep = "\n") | |
}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment