Skip to content

Instantly share code, notes, and snippets.

@pgstevenson
Last active May 11, 2021 06:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pgstevenson/c1402911793924487557386e238de2f9 to your computer and use it in GitHub Desktop.
Save pgstevenson/c1402911793924487557386e238de2f9 to your computer and use it in GitHub Desktop.
Helper functions to assist with RedCAP data cleaning
factor_table <- function(x) tibble(key = str_split(x, "\\|")[[1]]) %>%
separate(key, into = c("key", "value"), sep = ",", extra = "merge") %>%
mutate(across(everything(), trimws))
factor_convert <- function(x, d, dict) {
y <- factor_table(dict[dict$`Variable / Field Name` == cur_column(),]$`Choices, Calculations, OR Slider Labels`)
factor(x, levels = y$key, labels = y$value)
}
checkbox_labels <- function(x, dict) {
d <- factor_table(dict[dict$`Variable / Field Name` == x,]$`Choices, Calculations, OR Slider Labels`) %>%
mutate(across("key", ~paste(x, ., sep = "___")))
o <- as.list(d$value)
names(o) <- d$key
o
}
variable_labels <- function(d, dict) map_chr(names(d),
~ifelse(.x %in% dict$`Variable / Field Name`,
dict[dict$`Variable / Field Name` == .x,]$`Field Label`,
.x), dict = dict)
clean <- function(d, dict) {
dict <- dict[dict$`Variable / Field Name` %in% names(d),] # remove items from dictionary that aren't in the dataset
d <- mutate(d,
across(c(dict[map_lgl(dict$`Text Validation Type OR Show Slider Number` == "number", isTRUE),]$`Variable / Field Name`,
dict[map_lgl(dict$`Field Type` == "yesno", isTRUE),]$`Variable / Field Name`), as.numeric),
across(dict[map_lgl(dict$`Field Type` == "yesno", isTRUE),]$`Variable / Field Name`, as.logical),
across(starts_with(paste0(dict[map_lgl(dict$`Field Type` == "checkbox", isTRUE),]$`Variable / Field Name`, "___")), ~as.logical(as.numeric(.))),
across(dict[map_lgl(dict$`Field Type` %in% c("dropdown", "radio"), isTRUE),]$`Variable / Field Name`, factor_convert, d = d, dict = dict),
across(dict[map_lgl(dict$`Text Validation Type OR Show Slider Number` == "number", isTRUE),]$`Variable / Field Name`, as.numeric)) %>%
set_variable_labels(.labels = variable_labels(., dict))
# Apply checkbox labels
for (i in dict[map_lgl(dict$`Field Type` == "checkbox", isTRUE),]$`Variable / Field Name`) {
d <- set_variable_labels(d, .labels = checkbox_labels(i, dict))
}
d
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment