-
-
Save nutterb/501c370418abb58bee78 to your computer and use it in GitHub Desktop.
#### Intended for use with the `redcapAPI` package | |
#' @name missingSummary | |
#' @aliases missingSummary.redcapApiConnection | |
#' @aliases missingSummary.redcapDbConneciton | |
#' @aliases missingSummary_offline | |
#' @export missingSummary | |
#' @export missingSummary.redcapApiConnection | |
#' @export missingSummary.redcapDbConnection | |
#' @export missingSummary_offline | |
#' | |
#' @title Report of Missing Values | |
#' @description Returns a data frame of subject events with missing values. | |
#' | |
#' @param rcon A recapConnection object. | |
#' @param proj A redcapProjectInfo object. | |
#' @param batch.size Batch size parameter for \code{exportRecords} | |
#' @param records a filename pointing to the raw records download from REDCap | |
#' @param meta_data a filename pointing to the data dictionary download from REDCap | |
#' @param excludeMissingForms If all of the fields in a form are missing, would | |
#' you like to assume that they are purposefully missing? For instance, if | |
#' a patient did not experience an adverse event, the adverse event form would | |
#' contain no data and you would not want it in this report. | |
#' @param ... Additional arguments to pass to other methods. Currently ignored. | |
#' | |
#' @details The intention of this function is to generate a list of subject | |
#' events that are missing and could potentially be values that should have | |
#' been entered. | |
#' | |
#' The branching logic from the data dictionary is parsed and translated into | |
#' and R expression. When a field with branching logic passes the logical | |
#' statement, it is evaluated with \code{is.na}, otherwise, it is set to | |
#' \code{FALSE} (non-missing, because there was never an opportunity to | |
#' provide a value). | |
#' | |
#' Optionally, forms that are entirely missing can be determined to be | |
#' non-missing. This is applicable when, for instance, a patient did not | |
#' have an adverse event. In this case, a form dedicated to adverse events | |
#' would contain meaningless missing values and could be excluded from the | |
#' report. | |
#' | |
#' @author Benjamin Nutter | |
#' | |
missingSummary <- function(rcon, excludeMissingForms=TRUE, ...){ | |
UseMethod("missingSummary") | |
} | |
#' @rdname missingSummary | |
missingSummary.redcapDbConnection <- function(rcon, | |
excludeMissingForms=TRUE, ...){ | |
message("Please accept my apologies. The missingSummary method for redcapDbConnection objects\n", | |
"has not yet been written. Please consider using the API.") | |
} | |
#' @rdname missingSummary | |
missingSummary.redcapApiConnection <- function(rcon, | |
excludeMissingForms = TRUE, | |
..., | |
batch.size = -1, | |
bundle = getOption("redcap_bundle"), | |
error_handling = getOption("redcap_error_handling")){ | |
coll <- checkmate::makeAssertCollection() | |
checkmate::assert_class(x = rcon, | |
classes = "redcapApiConnection", | |
add = coll) | |
checkmate::assert_logical(x = excludeMissingForms, | |
len = 1, | |
add = coll) | |
checkmate::assert_integerish(x = batch.size, | |
len = 1, | |
add = coll) | |
error_handling <- checkmate::matchArg(x = error_handling, | |
choices = c("null", "error"), | |
add = coll) | |
checkmate::reportAssertions(coll) | |
# Import the records ---------------------------------------------- | |
# records will be used to store the results of tests for missingness | |
# records_orig will be used to conduct the tests | |
records_orig <- exportRecords(rcon, | |
factors = FALSE, | |
labels = TRUE, | |
dates = FALSE, | |
survey = FALSE, | |
dag = TRUE, | |
batch.size = batch.size) | |
# Import the Meta Data -------------------------------------------- | |
meta_data <- exportMetaData(rcon) | |
meta_data <- meta_data[meta_data$field_type != "descriptive", ] | |
logic <- parseBranchingLogic(meta_data$branching_logic) | |
names(logic) <- meta_data$field_name | |
records <- .missingSummary_isMissingInField(records_orig, | |
meta_data, | |
logic) | |
if (excludeMissingForms){ | |
records <- .missingSummary_excludeMissingForm(records, | |
meta_data, | |
logic) | |
} | |
.missingSummary_makeResultFrame(records, | |
meta_data) | |
} | |
#' @rdname missingSummary | |
#' @export | |
missingSummary_offline <- function(records, | |
meta_data, | |
excludeMissingForms = TRUE){ | |
coll <- checkmate::makeAssertCollection() | |
checkmate::assert_file_exists(x = records, | |
add = coll) | |
checkmate::assert_file_exists(meta_data, | |
add = coll) | |
checkmate::assert_logical(x = excludeMissingForms, | |
len = 1, | |
add = coll) | |
records_orig <- read.csv(records, | |
stringsAsFactors=FALSE, | |
na.string="") | |
meta_data <- read.csv(meta_data, | |
col.names=c('field_name', 'form_name', 'section_header', | |
'field_type', 'field_label', 'select_choices_or_calculations', | |
'field_note', 'text_validation_type_or_show_slider_number', | |
'text_validation_min', 'text_validation_max', 'identifier', | |
'branching_logic', 'required_field', 'custom_alignment', | |
'question_number', 'matrix_group_name', 'matrix_ranking', | |
'field_annotation'), | |
stringsAsFactors=FALSE) | |
meta_data <- meta_data[meta_data$field_type != "descriptive", ] | |
logic <- parseBranchingLogic(meta_data$branching_logic) | |
names(logic) <- meta_data$field_name | |
records <- .missingSummary_isMissingInField(records_orig, | |
meta_data, | |
logic) | |
if (excludeMissingForms){ | |
records <- .missingSummary_excludeMissingForm(records, | |
meta_data, | |
logic) | |
} | |
.missingSummary_makeResultFrame(records, | |
meta_data) | |
} | |
# UNEXPORTED -------------------------------------------------------- | |
.missingSummary_isMissingInField <- function(records_orig, | |
meta_data, | |
logic){ | |
records <- records_orig | |
for (i in seq_along(records)){ | |
# Actual field name | |
this_field <- names(records)[i] | |
# Remove checkbox suffixes. This allows logic to be matched to the field. | |
this_field_base <- sub("___.+$", "", this_field) | |
# get the logic expression for this iteration of the loop | |
this_logic <- logic[[this_field_base]] | |
# We are only going to look at fields that are informative as missing. | |
# we skip fixed fields (see unexported) and the ID variable. | |
if (!this_field %in% c(.missingSummary_fixedFields, | |
meta_data$field_name[1]) & | |
!is.null(this_logic)){ | |
# get the name of the form on which the field is saved | |
tmp_form <- meta_data$form_name[meta_data$field_name == | |
sub("___[[:print:]]", "", names(records)[i])] | |
tmp_form <- paste0(tmp_form, "_complete") | |
# NOTE: in the result, TRUE means the value is missing | |
# FALSE means the value is non-missing | |
if (tmp_form == "_complete"){ | |
# If we are here, we didn't find a matching form name. We will | |
# assume variables not on a form are always non-missing. | |
records[[i]] <- rep(FALSE, nrow(records)) | |
} | |
else if (!tmp_form %in% names(records)){ | |
# If we are here, we are evaluating a `[form]_complete` field. | |
# We just want to know if it is missing or not. | |
records[[i]] <- is.na(records[[i]]) | |
} else if (!is.expression(this_logic)) { | |
# If we are here, there is not branching logic. | |
# If the `[form]_complete` field is missing, we return FALSE | |
# If the `[form]_complete` is non-missing, we return the missingness of the value | |
records[[i]] <- ifelse(test = is.na(records_orig[[tmp_form]]), | |
yes = FALSE, | |
no = is.na(records_orig[[i]])) | |
} | |
else | |
# Here we have branching logic. | |
# If the `[form]_complete` field is missing, we return FALSE | |
# If the `[form]_complete` is non-missing: | |
# The branching logic is satisfied: return the missingness of the value | |
# The branchign logic is not satisfied: return FALSE | |
records[[i]] <- ifelse(test = is.na(records_orig[[tmp_form]]), | |
yes = FALSE, | |
no = ifelse(test = with(records_orig, eval(this_logic)), | |
yes = is.na(records_orig[[i]]), | |
no = FALSE)) | |
} | |
} | |
records | |
} | |
.missingSummary_excludeMissingForm <- function(records, | |
meta_data, | |
logic){ | |
# Get the `[form]_complete` fields. | |
form_names <- unique(meta_data$form_name) | |
form_complete_names <- paste0(form_names, "_complete") | |
for (i in seq_len(nrow(records))){ | |
# For each record, find the fields associated with the forms | |
# where the `[form]_complete` field is missing. | |
completeFormMissing <- lapply(form_names, | |
function(f){ | |
flds <- meta_data$field_name[meta_data$form_name %in% f] | |
flds <- flds[!flds %in% meta_data$field_name[1]] | |
flds <- flds[!flds %in% meta_data$field_name[meta_data$field_type == "checkbox"]] | |
if (length(flds) == 0){ | |
return(NULL) | |
} | |
else if (all(unlist(records[i, flds, drop = FALSE]) | sapply(logic[flds], is.expression))){ | |
return(flds) | |
} | |
else { | |
return(NULL) | |
} | |
}) | |
# If the `[form]_complete` field is missing, we set the missingness value of the | |
# record for fields on that value to FALSE, indicating that they are non-missing | |
# That is, we don't consider a value missing unless the form is marked either 'Complete' or 'Incomplete' | |
completeFormMissing <- unlist(completeFormMissing) | |
if (!is.null(completeFormMissing)){ | |
records[i, completeFormMissing] <- FALSE | |
} | |
} | |
records | |
} | |
.missingSummary_makeResultFrame <- function(records, | |
meta_data){ | |
# These are the identifier fields in the result | |
start_field <- c(meta_data$field_name[1], | |
.missingSummary_fixedFields) | |
start_field <- start_field[start_field %in% names(records)] | |
# Make the initial data frame of results. Only the identifiers here | |
MissingSummary <- records[start_field] | |
# Remove the identifier fields from `records`. | |
# This makes it easier to run an apply statement on the rows | |
records <- records[!names(records) %in% start_field] | |
# Number of missing values | |
MissingSummary$n_missing <- numeric(nrow(records)) | |
MissingSummary$missing <- character(nrow(records)) | |
for (i in seq_len(nrow(MissingSummary))){ | |
missing_this_row <- vapply(records[i, ], | |
FUN = isTRUE, | |
FUN.VALUE = logical(1)) | |
MissingSummary$n_missing[i] <- sum(missing_this_row) | |
MissingSummary$missing[i] <- paste0(names(records)[missing_this_row], | |
collapse = ", ") | |
} | |
MissingSummary | |
} | |
# The field names listed here are those generated by REDCap. We are | |
# not interested in if they are missing and so will skip them. | |
# Their values can make useful labels, so we want to leave them | |
# untouched. | |
.missingSummary_fixedFields <- c("redcap_event_name", | |
"redcap_data_access_group", | |
"redcap_repeat_instrument", | |
"redcap_repeat_instance") |
Since It's not possible to request a pull for a gist a paste my code suggestion
records[[i]] <- ifelse(is.na(records[[tmp_form]]),
FALSE,
ifelse(with(records, eval(l)) %in% c(NA,F),
FALSE,
is.na(records[[i]])
)
)
This deal with some NAs
Hello, I have a relatively large project on REDCap which has multiple instruments and repeat instances. I set the up the script using : rcon <- redcapConnection(url='xxx', token='xxx') then proceeded to run the script above but unfortunately nothing was produced despite the entire script running with no errors.
I know it is connecting to the server fine as when I run individual lines of code, I can still extract data (e.g. below):
records <- exportRecords(rcon, factors=FALSE, labels=TRUE,
dates=FALSE, survey=FALSE, dag=TRUE,
batch.size=batch.size)
Any suggestions would be greatly appreciated!!!!
Hi Benjamin,
I'm getting the following error:
" Error in parse(text = x) : :1:39: unexpected input
1: investigation_arm_1mr_ctca == '1' & 90_
^ "
my code is as follows:
rcon<-redcapConnection(url="URL", token="API")
data<- missingSummary.redcapApiConnection(rcon)
I'm working with redcap v12.4.21
Many thanks,
Christopher
Wow!!! I just started an internal effort in our group to produce exactly this script. Love it. This should be an adjunct package to redcapAPI!!! Or even part of the package.
I fixed the branching logic parser by adding
l <- gsub("[<] [=]", " <", l)
l <- gsub("[>] [=]", " >", l)
I noticed also that for checkbox options going beyond 9, the variable names will look like "var___10, var___11" etc
the sub("[[:print:]]) will match anything var___x to var in meta field name but it will not match a var___xx to var in meta field names due to the extra numeric. I added an asterisk "[[:print:]]*" to fix this. My apologies I am new to github and do not know how to write edits or contribute.