cimentadaj/education_codebook.R

## education_codebook.R
library(essurvey)
library(dplyr)
library(purrr)

## This works for the UK, where PhD is coded as `1` whereas
## in other countries PhD is coded as the maximum level
reverse_coding <- function(x, na_vals) {
  ## Find out the maximum + 1 to subtract that from the vector
  ## to reverse it
  max_val <- max(setdiff(x, na_vals), na.rm = TRUE) + 1
  # Exclude all missing codes and NA
  filt <- !(x %in% na_vals) & !is.na(x)

  # Reverse code the vector
  x[filt] <- abs(x[filt] - max_val)

  # Repeate similarly for the labels + sorting
  attr_labels <- attributes(x)$labels
  filt_attr <- !(attr_labels %in% na_vals) & !is.na(attr_labels)
  attr_labels[filt_attr] <- abs(attr_labels[filt_attr] - max_val)
  sorted_labels <- sort(attr_labels, na.last = TRUE)
  attributes(x)$labels <- sorted_labels
  x
}

## Given a country data frame and variable,
## return the unique country-specific education
## variable and the equivalent eisced
unique_var <- function(df, var, cnt) {
  if (cnt == "United Kingdom") {
    ## 5555 is other and 11 is None of these degrees
    ## we want to keep them with their initial coding
    df[[var]] <- reverse_coding(df[[var]], na_vals = c(5555, 11))
  }

  df %>%
    count(!!sym(var), eisced) %>%
    mutate(country = cnt, eduvar = !!sym(var)) %>%
    select(country, eduvar, eisced, -n)
}


## Country = Education variable
cnts <- c(
  "Austria" = "edlveat",
  "Italy" = "edlvdit",
  "Belgium" = "edlvebe",
  "Netherlands" = "edlvenl",
  "Switzerland" = "edlvdch",
  "Spain" = "edlvges",
  "United Kingdom" = "eduagb2"
  )

## Download the ESS data
set_email("cimentadaj@gmail.com")
ess_eight <- import_rounds(8)

## Loop over each country and show the country-specific education variable
## and the ISCED equivalent
res <-
  cnts %>%
  imap(~ unique_var(ess_eight, .x, .y))

res

## Germany has three education groups
## Same variable but 1, 2, 3
## Don't know which variable to choose.
## UK has the same, but I just did it for
## one, just to have an example
gr <- import_country("Germany", 8)
gr %>%
  select(edubde1, eisced)

library(haven)

walk(res, ~ {
  cat("\n")
  cat(unique(.x$country), sep = "\n")
  attr(.x$eduvar, "labels") %>%
    paste0("     ", "- ", ., ". ", names(.)) %>%
    cat(sep = "\n")
})
	library(essurvey)
	library(dplyr)
	library(purrr)

	## This works for the UK, where PhD is coded as `1` whereas
	## in other countries PhD is coded as the maximum level
	reverse_coding <- function(x, na_vals) {
	## Find out the maximum + 1 to subtract that from the vector
	## to reverse it
	max_val <- max(setdiff(x, na_vals), na.rm = TRUE) + 1
	# Exclude all missing codes and NA
	filt <- !(x %in% na_vals) & !is.na(x)

	# Reverse code the vector
	x[filt] <- abs(x[filt] - max_val)

	# Repeate similarly for the labels + sorting
	attr_labels <- attributes(x)$labels
	filt_attr <- !(attr_labels %in% na_vals) & !is.na(attr_labels)
	attr_labels[filt_attr] <- abs(attr_labels[filt_attr] - max_val)
	sorted_labels <- sort(attr_labels, na.last = TRUE)
	attributes(x)$labels <- sorted_labels
	x
	}

	## Given a country data frame and variable,
	## return the unique country-specific education
	## variable and the equivalent eisced
	unique_var <- function(df, var, cnt) {
	if (cnt == "United Kingdom") {
	## 5555 is other and 11 is None of these degrees
	## we want to keep them with their initial coding
	df[[var]] <- reverse_coding(df[[var]], na_vals = c(5555, 11))
	}

	df %>%
	count(!!sym(var), eisced) %>%
	mutate(country = cnt, eduvar = !!sym(var)) %>%
	select(country, eduvar, eisced, -n)
	}


	## Country = Education variable
	cnts <- c(
	"Austria" = "edlveat",
	"Italy" = "edlvdit",
	"Belgium" = "edlvebe",
	"Netherlands" = "edlvenl",
	"Switzerland" = "edlvdch",
	"Spain" = "edlvges",
	"United Kingdom" = "eduagb2"
	)

	## Download the ESS data
	set_email("cimentadaj@gmail.com")
	ess_eight <- import_rounds(8)

	## Loop over each country and show the country-specific education variable
	## and the ISCED equivalent
	res <-
	cnts %>%
	imap(~ unique_var(ess_eight, .x, .y))

	res

	## Germany has three education groups
	## Same variable but 1, 2, 3
	## Don't know which variable to choose.
	## UK has the same, but I just did it for
	## one, just to have an example
	gr <- import_country("Germany", 8)
	gr %>%
	select(edubde1, eisced)

	library(haven)

	walk(res, ~ {
	cat("\n")
	cat(unique(.x$country), sep = "\n")
	attr(.x$eduvar, "labels") %>%
	paste0(" ", "- ", ., ". ", names(.)) %>%
	cat(sep = "\n")
	})