Skip to content

Instantly share code, notes, and snippets.

@nutterb
Last active March 7, 2023 21:47
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save nutterb/501c370418abb58bee78 to your computer and use it in GitHub Desktop.
Save nutterb/501c370418abb58bee78 to your computer and use it in GitHub Desktop.
Generate a Report of Fields with Missing Values in a REDCap Database Using the redcapAPI package
#### Intended for use with the `redcapAPI` package
#' @name missingSummary
#' @aliases missingSummary.redcapApiConnection
#' @aliases missingSummary.redcapDbConneciton
#' @aliases missingSummary_offline
#' @export missingSummary
#' @export missingSummary.redcapApiConnection
#' @export missingSummary.redcapDbConnection
#' @export missingSummary_offline
#'
#' @title Report of Missing Values
#' @description Returns a data frame of subject events with missing values.
#'
#' @param rcon A recapConnection object.
#' @param proj A redcapProjectInfo object.
#' @param batch.size Batch size parameter for \code{exportRecords}
#' @param records a filename pointing to the raw records download from REDCap
#' @param meta_data a filename pointing to the data dictionary download from REDCap
#' @param excludeMissingForms If all of the fields in a form are missing, would
#' you like to assume that they are purposefully missing? For instance, if
#' a patient did not experience an adverse event, the adverse event form would
#' contain no data and you would not want it in this report.
#' @param ... Additional arguments to pass to other methods. Currently ignored.
#'
#' @details The intention of this function is to generate a list of subject
#' events that are missing and could potentially be values that should have
#' been entered.
#'
#' The branching logic from the data dictionary is parsed and translated into
#' and R expression. When a field with branching logic passes the logical
#' statement, it is evaluated with \code{is.na}, otherwise, it is set to
#' \code{FALSE} (non-missing, because there was never an opportunity to
#' provide a value).
#'
#' Optionally, forms that are entirely missing can be determined to be
#' non-missing. This is applicable when, for instance, a patient did not
#' have an adverse event. In this case, a form dedicated to adverse events
#' would contain meaningless missing values and could be excluded from the
#' report.
#'
#' @author Benjamin Nutter
#'
missingSummary <- function(rcon, excludeMissingForms=TRUE, ...){
UseMethod("missingSummary")
}
#' @rdname missingSummary
missingSummary.redcapDbConnection <- function(rcon,
excludeMissingForms=TRUE, ...){
message("Please accept my apologies. The missingSummary method for redcapDbConnection objects\n",
"has not yet been written. Please consider using the API.")
}
#' @rdname missingSummary
missingSummary.redcapApiConnection <- function(rcon,
excludeMissingForms = TRUE,
...,
batch.size = -1,
bundle = getOption("redcap_bundle"),
error_handling = getOption("redcap_error_handling")){
coll <- checkmate::makeAssertCollection()
checkmate::assert_class(x = rcon,
classes = "redcapApiConnection",
add = coll)
checkmate::assert_logical(x = excludeMissingForms,
len = 1,
add = coll)
checkmate::assert_integerish(x = batch.size,
len = 1,
add = coll)
error_handling <- checkmate::matchArg(x = error_handling,
choices = c("null", "error"),
add = coll)
checkmate::reportAssertions(coll)
# Import the records ----------------------------------------------
# records will be used to store the results of tests for missingness
# records_orig will be used to conduct the tests
records_orig <- exportRecords(rcon,
factors = FALSE,
labels = TRUE,
dates = FALSE,
survey = FALSE,
dag = TRUE,
batch.size = batch.size)
# Import the Meta Data --------------------------------------------
meta_data <- exportMetaData(rcon)
meta_data <- meta_data[meta_data$field_type != "descriptive", ]
logic <- parseBranchingLogic(meta_data$branching_logic)
names(logic) <- meta_data$field_name
records <- .missingSummary_isMissingInField(records_orig,
meta_data,
logic)
if (excludeMissingForms){
records <- .missingSummary_excludeMissingForm(records,
meta_data,
logic)
}
.missingSummary_makeResultFrame(records,
meta_data)
}
#' @rdname missingSummary
#' @export
missingSummary_offline <- function(records,
meta_data,
excludeMissingForms = TRUE){
coll <- checkmate::makeAssertCollection()
checkmate::assert_file_exists(x = records,
add = coll)
checkmate::assert_file_exists(meta_data,
add = coll)
checkmate::assert_logical(x = excludeMissingForms,
len = 1,
add = coll)
records_orig <- read.csv(records,
stringsAsFactors=FALSE,
na.string="")
meta_data <- read.csv(meta_data,
col.names=c('field_name', 'form_name', 'section_header',
'field_type', 'field_label', 'select_choices_or_calculations',
'field_note', 'text_validation_type_or_show_slider_number',
'text_validation_min', 'text_validation_max', 'identifier',
'branching_logic', 'required_field', 'custom_alignment',
'question_number', 'matrix_group_name', 'matrix_ranking',
'field_annotation'),
stringsAsFactors=FALSE)
meta_data <- meta_data[meta_data$field_type != "descriptive", ]
logic <- parseBranchingLogic(meta_data$branching_logic)
names(logic) <- meta_data$field_name
records <- .missingSummary_isMissingInField(records_orig,
meta_data,
logic)
if (excludeMissingForms){
records <- .missingSummary_excludeMissingForm(records,
meta_data,
logic)
}
.missingSummary_makeResultFrame(records,
meta_data)
}
# UNEXPORTED --------------------------------------------------------
.missingSummary_isMissingInField <- function(records_orig,
meta_data,
logic){
records <- records_orig
for (i in seq_along(records)){
# Actual field name
this_field <- names(records)[i]
# Remove checkbox suffixes. This allows logic to be matched to the field.
this_field_base <- sub("___.+$", "", this_field)
# get the logic expression for this iteration of the loop
this_logic <- logic[[this_field_base]]
# We are only going to look at fields that are informative as missing.
# we skip fixed fields (see unexported) and the ID variable.
if (!this_field %in% c(.missingSummary_fixedFields,
meta_data$field_name[1]) &
!is.null(this_logic)){
# get the name of the form on which the field is saved
tmp_form <- meta_data$form_name[meta_data$field_name ==
sub("___[[:print:]]", "", names(records)[i])]
tmp_form <- paste0(tmp_form, "_complete")
# NOTE: in the result, TRUE means the value is missing
# FALSE means the value is non-missing
if (tmp_form == "_complete"){
# If we are here, we didn't find a matching form name. We will
# assume variables not on a form are always non-missing.
records[[i]] <- rep(FALSE, nrow(records))
}
else if (!tmp_form %in% names(records)){
# If we are here, we are evaluating a `[form]_complete` field.
# We just want to know if it is missing or not.
records[[i]] <- is.na(records[[i]])
} else if (!is.expression(this_logic)) {
# If we are here, there is not branching logic.
# If the `[form]_complete` field is missing, we return FALSE
# If the `[form]_complete` is non-missing, we return the missingness of the value
records[[i]] <- ifelse(test = is.na(records_orig[[tmp_form]]),
yes = FALSE,
no = is.na(records_orig[[i]]))
}
else
# Here we have branching logic.
# If the `[form]_complete` field is missing, we return FALSE
# If the `[form]_complete` is non-missing:
# The branching logic is satisfied: return the missingness of the value
# The branchign logic is not satisfied: return FALSE
records[[i]] <- ifelse(test = is.na(records_orig[[tmp_form]]),
yes = FALSE,
no = ifelse(test = with(records_orig, eval(this_logic)),
yes = is.na(records_orig[[i]]),
no = FALSE))
}
}
records
}
.missingSummary_excludeMissingForm <- function(records,
meta_data,
logic){
# Get the `[form]_complete` fields.
form_names <- unique(meta_data$form_name)
form_complete_names <- paste0(form_names, "_complete")
for (i in seq_len(nrow(records))){
# For each record, find the fields associated with the forms
# where the `[form]_complete` field is missing.
completeFormMissing <- lapply(form_names,
function(f){
flds <- meta_data$field_name[meta_data$form_name %in% f]
flds <- flds[!flds %in% meta_data$field_name[1]]
flds <- flds[!flds %in% meta_data$field_name[meta_data$field_type == "checkbox"]]
if (length(flds) == 0){
return(NULL)
}
else if (all(unlist(records[i, flds, drop = FALSE]) | sapply(logic[flds], is.expression))){
return(flds)
}
else {
return(NULL)
}
})
# If the `[form]_complete` field is missing, we set the missingness value of the
# record for fields on that value to FALSE, indicating that they are non-missing
# That is, we don't consider a value missing unless the form is marked either 'Complete' or 'Incomplete'
completeFormMissing <- unlist(completeFormMissing)
if (!is.null(completeFormMissing)){
records[i, completeFormMissing] <- FALSE
}
}
records
}
.missingSummary_makeResultFrame <- function(records,
meta_data){
# These are the identifier fields in the result
start_field <- c(meta_data$field_name[1],
.missingSummary_fixedFields)
start_field <- start_field[start_field %in% names(records)]
# Make the initial data frame of results. Only the identifiers here
MissingSummary <- records[start_field]
# Remove the identifier fields from `records`.
# This makes it easier to run an apply statement on the rows
records <- records[!names(records) %in% start_field]
# Number of missing values
MissingSummary$n_missing <- numeric(nrow(records))
MissingSummary$missing <- character(nrow(records))
for (i in seq_len(nrow(MissingSummary))){
missing_this_row <- vapply(records[i, ],
FUN = isTRUE,
FUN.VALUE = logical(1))
MissingSummary$n_missing[i] <- sum(missing_this_row)
MissingSummary$missing[i] <- paste0(names(records)[missing_this_row],
collapse = ", ")
}
MissingSummary
}
# The field names listed here are those generated by REDCap. We are
# not interested in if they are missing and so will skip them.
# Their values can make useful labels, so we want to leave them
# untouched.
.missingSummary_fixedFields <- c("redcap_event_name",
"redcap_data_access_group",
"redcap_repeat_instrument",
"redcap_repeat_instance")
@FlitcroftWhite
Copy link

Hi Benjamin,

I'm getting the following error:
" Error in parse(text = x) : :1:39: unexpected input
1: investigation_arm_1mr_ctca == '1' & 90_
^ "

my code is as follows:

rcon<-redcapConnection(url="URL", token="API")
data<- missingSummary.redcapApiConnection(rcon)

I'm working with redcap v12.4.21

Many thanks,
Christopher

@spgarbet
Copy link

Wow!!! I just started an internal effort in our group to produce exactly this script. Love it. This should be an adjunct package to redcapAPI!!! Or even part of the package.

@kylerove
Copy link

kylerove commented Jan 24, 2023 via email

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment