Skip to content

Instantly share code, notes, and snippets.

@jrnold
Created February 26, 2017 23:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jrnold/474f4f5f7882bc32b4700cd3e7a795c8 to your computer and use it in GitHub Desktop.
Save jrnold/474f4f5f7882bc32b4700cd3e7a795c8 to your computer and use it in GitHub Desktop.
Convert Missing Values for the European Social Survey
# Put both labels and values of the types of ESS missing values in a
# list to re-use later.
ESS_MISS <- list(
not_applicable = list(pattern = "Not +.p+lic+able", value = 6),
refused = list(pattern = "Refus..", value = 7),
not_available = list(pattern = "(No .nswer|Not .vailable)",
value = 8),
dont_know = list(pattern = "Don.?t .now", value = 9)
)
ess_missings <-
function(x, missings = c("not_applicable", "refused",
"not_available", "dont_know")) {
miss <- ESS_MISS[missings]
miss_pattern <- str_c("(", map_chr(miss, "pattern"), ")", collapse = "|")
if (is.factor(x)) {
# If factor, drop old levels
lvls <- levels(x)
new_lvls <- lvls[!str_detect(names(lvls), miss_pattern)]
factor(x, levels = new_lvls)
} else if (is.character(x)) {
# If character, set values to missing
x[str_detect(x, miss_pattern)] <- NA
} else if (is.labelled(x)) {
# If haven::labelled, then set values to missing
# And adjust labels
lbls <- attr(x, "labels")
na_lbls <- lbls[str_detect(names(lbls), miss_pattern)]
new_lbls <- lbls[set_diff(names(lbls), names(na_lbls))]
x[x %in% vals] <- NA
x <- labelled(x, new_lbls)
} else if (is.numeric(x)) {
# If numeric value, then set values to missing
# 1 = <10, 2 <100, 3 < 1000
dig <- ceiling(log10(max(x, na.rm = TRUE)))
# the rep() turns 6 to 6, 66, or 666 depending on the digits, etc.
x[x %in% map_dbl(miss, ~ rep(.x[["value"]], dig))] <- NA
} else {
stop(x, " is an unsupported type: ", class(x), call. = FALSE)
}
x
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment