Last active
September 1, 2024 15:37
-
-
Save JanMarvin/db0d4081c70f3a1c6982140c174a5642 to your computer and use it in GitHub Desktop.
custom filter data table function for openxlsx2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' function to create the autofilter xml structure required for openxml | |
#' @param tabs a wbWorkbook wb$tables object | |
#' @param tab_name a table name in the workbook | |
#' @param conditions a named list with conditions to apply to the columns | |
prepare <- function(tabs, tab_name, conditions) { | |
if (!tab_name %in% tabs$tab_name) stop("Table with this tab_name not found.") | |
tabs <- tabs[tabs$tab_name == tab_name, , drop = FALSE] | |
vars <- openxlsx2:::rbindlist( | |
xml_attr(tabs$tab_xml, "table", "tableColumns", "tableColumn") | |
) | |
cond_vars <- names(conditions) | |
print(cond_vars) | |
print(vars$name) | |
if (any(!cond_vars %in% vars$name)) stop("Condtion variable not found in table.") | |
ref <- openxlsx2:::rbindlist(xml_attr(tabs$tab_xml, "table"))$ref | |
autoFilter <- xml_node_create("autoFilter", xml_attributes = c(ref = ref)) | |
vars$id <- as.integer(vars$id) - 1L | |
aF <- NULL | |
for (cond_var in cond_vars) { | |
colId <- vars$id[vars$name == cond_var] | |
condition <- conditions[[cond_var]] | |
## == default or %in% with multiple values | |
if (condition$operator == "equal") { | |
filter <- vapply( | |
condition$val, | |
function(x) { | |
xml_node_create("filter", xml_attributes = c(val = openxlsx2:::as_xml_attr(x))) | |
}, | |
FUN.VALUE = NA_character_ | |
) | |
filters <- xml_node_create("filters") | |
filterColumn <- xml_node_create("filterColumn", xml_attributes = c(colId = openxlsx2:::as_xml_attr(colId))) | |
filters <- xml_add_child(filters, filter) | |
filterColumn <- xml_add_child(filterColumn, filters) | |
} | |
## != notEqual, < lessThan, <= lessThanOrEqual, > greaterThan, >= greaterThanOrEqual | |
## TODO provide replacement function | |
ops <- c("notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual") | |
if (condition$operator %in% ops) { | |
customFilter <- vapply( | |
condition$val, | |
function(x) { | |
xml_node_create( | |
"customFilter", | |
xml_attributes = c(operator = condition$operator, | |
val = openxlsx2:::as_xml_attr(x)) | |
) | |
}, | |
FUN.VALUE = NA_character_ | |
) | |
customFilters <- xml_node_create("customFilters") | |
filterColumn <- xml_node_create("filterColumn", xml_attributes = c(colId = openxlsx2:::as_xml_attr(colId))) | |
customFilters <- xml_add_child(customFilters, customFilter) | |
filterColumn <- xml_add_child(filterColumn, customFilters) | |
} | |
aF <- c(aF, filterColumn) | |
} | |
autoFilter <- xml_add_child(autoFilter, aF) | |
autoFilter | |
} | |
#' helper to parse the string into something that can be converted into openxml | |
#' column filters | |
#' @param filter_expr an expression like "x$cyl != 4" | |
create_conditions <- function(filter_expr) { | |
conditions <- list() | |
# Define a helper function to add conditions to the list | |
add_condition <- function(operator, value) { | |
conditions[[value$nams]] <<- list(val = value$vals, operator = operator) | |
} | |
get_vals <- function(fltr_xpr, operator) { | |
args <- strsplit(fltr_xpr, operator)[[1]] | |
list( | |
nams = gsub("`", "", gsub("^x\\$", "", trimws(args[1]))), | |
vals = eval(parse(text = trimws(args[2]))) | |
) | |
} | |
# should only have & | |
fltr_xprs <- strsplit(filter_expr, "&")[[1]] | |
for (fltr_xpr in fltr_xprs) { | |
# Extract conditions for "equal" | |
if (grepl("%in%", fltr_xpr)) { | |
values <- get_vals(fltr_xpr, "%in%") | |
add_condition("equal", values) | |
} | |
# Extract conditions for "equal" using == | |
if (grepl("==", fltr_xpr)) { | |
values <- get_vals(fltr_xpr, "==") | |
add_condition("equal", values) | |
} | |
# Extract conditions for "notEqual" | |
if (grepl("!=", fltr_xpr)) { | |
values <- get_vals(fltr_xpr, "!=") | |
add_condition("notEqual", values) | |
} | |
# Extract conditions for "lessThan" | |
if (grepl("<[^=]", fltr_xpr)) { # "<" but not "<=" | |
values <- get_vals(fltr_xpr, "<") | |
add_condition("lessThan", values) | |
} | |
# Extract conditions for "lessThanOrEqual" | |
if (grepl("<=", fltr_xpr)) { | |
values <- get_vals(fltr_xpr, "<=") | |
add_condition("lessThanOrEqual", values) | |
} | |
# Extract conditions for "greaterThan" | |
if (grepl(">[^=]", fltr_xpr)) { # ">" but not ">=" | |
values <- get_vals(fltr_xpr, ">") | |
add_condition("greaterThan", values) | |
} | |
# Extract conditions for "greaterThanOrEqual" | |
if (grepl(">=", fltr_xpr)) { | |
values <- get_vals(fltr_xpr, ">=") | |
add_condition("greaterThanOrEqual", values) | |
} | |
} | |
conditions | |
} | |
#' get rows to hide (in _all but not in _sel) | |
#' @param wb a wbWorkbook | |
#' @param tab_len an index for a table object in the workbook | |
#' @param filter the filter as character string | |
rows_to_hide <- function(wb, tab_len, filter) { | |
x <- wb_to_df(wb, sheet = wb$tables$tab_sheet[tab_len], dims = wb$tables$tab_ref[tab_len]) | |
rows_all <- rownames(x) | |
x <- x[eval(parse(text = filter)), ] | |
rows_sel <- rownames(x) | |
out <- rows_all[!rows_all %in% rows_sel] | |
as.integer(out) | |
} | |
escape_varname <- function(var_name) { | |
vapply(var_name, function(x) { | |
ifelse(grepl("\\s", x), sprintf("`%s`", x), x) | |
}, NA_character_) | |
} | |
#' function to apply data table filter | |
#' @param choose a base R statement to filter an object "x$cyl == 4" | |
#' @inheritParams openxlsx2::wb_add_data_table | |
wb_filter_data_table <- function(wb, sheet = current_sheet(), x, dims = "A1", choose = NULL, ...) { | |
openxlsx2:::assert_workbook(wb) | |
## add the data table object | |
wb$add_data_table(wb, sheet = sheet, x = x, dims = dims, ...) | |
if (!is.null(choose)) { | |
nms <- names(choose) | |
nms <- paste0("x$", escape_varname(nms)) | |
filter <- vapply(seq_along(nms), function(i) { | |
gsub("x", replacement = nms[i], x = choose[i])}, | |
NA_character_) | |
print(filter) | |
openxlsx2:::assert_class(filter, "character") | |
## assume that the table created is the last table added to the workbook | |
tab_len <- length(wb$tables$tab_xml) | |
### prepare condition list & autofilter xml | |
fltr <- create_conditions(filter) | |
new_autofilter <- prepare(tabs = wb$tables, tab_name = wb$tables$tab_name[tab_len], conditions = fltr) | |
sel <- rows_to_hide(wb, tab_len, filter) | |
# hide rows | |
wb$set_row_heights(rows = sel, hidden = TRUE) | |
# add auto filter to table | |
wb$tables$tab_xml[tab_len] <- gsub( | |
pattern = xml_node(wb$tables$tab_xml[tab_len], "table", "autoFilter"), | |
replacement = new_autofilter, | |
x = wb$tables$tab_xml[tab_len] | |
) | |
} | |
invisible(wb) | |
} | |
library(openxlsx2) | |
### example 1 | |
wb <- wb_workbook() %>% | |
wb_add_worksheet() %>% | |
wb_filter_data_table(x = mtcars, choose = c(cyl = "x %in% c(4, 8)", am = c("x != 1"))) | |
if (interactive()) wb$open() | |
### example 2 | |
df <- esoph | |
names(df) <- c("Age group", "Alcohol consumption", "Tobacco consumption", "Number of cases", "Number of controls") | |
wb <- wb_workbook()$add_worksheet() | |
wb <- wb_filter_data_table(wb, x = df, choose = c(`Age group` = "x >'25-34'")) | |
if (interactive()) wb$open() | |
### example 3 | |
df <- ggplot2::economics | |
wb <- wb_workbook()$add_worksheet() | |
wb <- wb_filter_data_table(wb, x = df, choose = c(date = "x > as.Date('2010-01-01')")) | |
if (interactive()) wb$open() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment