Skip to content

Instantly share code, notes, and snippets.

@JanMarvin
Last active September 1, 2024 15:37
Show Gist options
  • Save JanMarvin/db0d4081c70f3a1c6982140c174a5642 to your computer and use it in GitHub Desktop.
Save JanMarvin/db0d4081c70f3a1c6982140c174a5642 to your computer and use it in GitHub Desktop.
custom filter data table function for openxlsx2
#' function to create the autofilter xml structure required for openxml
#' @param tabs a wbWorkbook wb$tables object
#' @param tab_name a table name in the workbook
#' @param conditions a named list with conditions to apply to the columns
prepare <- function(tabs, tab_name, conditions) {
if (!tab_name %in% tabs$tab_name) stop("Table with this tab_name not found.")
tabs <- tabs[tabs$tab_name == tab_name, , drop = FALSE]
vars <- openxlsx2:::rbindlist(
xml_attr(tabs$tab_xml, "table", "tableColumns", "tableColumn")
)
cond_vars <- names(conditions)
print(cond_vars)
print(vars$name)
if (any(!cond_vars %in% vars$name)) stop("Condtion variable not found in table.")
ref <- openxlsx2:::rbindlist(xml_attr(tabs$tab_xml, "table"))$ref
autoFilter <- xml_node_create("autoFilter", xml_attributes = c(ref = ref))
vars$id <- as.integer(vars$id) - 1L
aF <- NULL
for (cond_var in cond_vars) {
colId <- vars$id[vars$name == cond_var]
condition <- conditions[[cond_var]]
## == default or %in% with multiple values
if (condition$operator == "equal") {
filter <- vapply(
condition$val,
function(x) {
xml_node_create("filter", xml_attributes = c(val = openxlsx2:::as_xml_attr(x)))
},
FUN.VALUE = NA_character_
)
filters <- xml_node_create("filters")
filterColumn <- xml_node_create("filterColumn", xml_attributes = c(colId = openxlsx2:::as_xml_attr(colId)))
filters <- xml_add_child(filters, filter)
filterColumn <- xml_add_child(filterColumn, filters)
}
## != notEqual, < lessThan, <= lessThanOrEqual, > greaterThan, >= greaterThanOrEqual
## TODO provide replacement function
ops <- c("notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual")
if (condition$operator %in% ops) {
customFilter <- vapply(
condition$val,
function(x) {
xml_node_create(
"customFilter",
xml_attributes = c(operator = condition$operator,
val = openxlsx2:::as_xml_attr(x))
)
},
FUN.VALUE = NA_character_
)
customFilters <- xml_node_create("customFilters")
filterColumn <- xml_node_create("filterColumn", xml_attributes = c(colId = openxlsx2:::as_xml_attr(colId)))
customFilters <- xml_add_child(customFilters, customFilter)
filterColumn <- xml_add_child(filterColumn, customFilters)
}
aF <- c(aF, filterColumn)
}
autoFilter <- xml_add_child(autoFilter, aF)
autoFilter
}
#' helper to parse the string into something that can be converted into openxml
#' column filters
#' @param filter_expr an expression like "x$cyl != 4"
create_conditions <- function(filter_expr) {
conditions <- list()
# Define a helper function to add conditions to the list
add_condition <- function(operator, value) {
conditions[[value$nams]] <<- list(val = value$vals, operator = operator)
}
get_vals <- function(fltr_xpr, operator) {
args <- strsplit(fltr_xpr, operator)[[1]]
list(
nams = gsub("`", "", gsub("^x\\$", "", trimws(args[1]))),
vals = eval(parse(text = trimws(args[2])))
)
}
# should only have &
fltr_xprs <- strsplit(filter_expr, "&")[[1]]
for (fltr_xpr in fltr_xprs) {
# Extract conditions for "equal"
if (grepl("%in%", fltr_xpr)) {
values <- get_vals(fltr_xpr, "%in%")
add_condition("equal", values)
}
# Extract conditions for "equal" using ==
if (grepl("==", fltr_xpr)) {
values <- get_vals(fltr_xpr, "==")
add_condition("equal", values)
}
# Extract conditions for "notEqual"
if (grepl("!=", fltr_xpr)) {
values <- get_vals(fltr_xpr, "!=")
add_condition("notEqual", values)
}
# Extract conditions for "lessThan"
if (grepl("<[^=]", fltr_xpr)) { # "<" but not "<="
values <- get_vals(fltr_xpr, "<")
add_condition("lessThan", values)
}
# Extract conditions for "lessThanOrEqual"
if (grepl("<=", fltr_xpr)) {
values <- get_vals(fltr_xpr, "<=")
add_condition("lessThanOrEqual", values)
}
# Extract conditions for "greaterThan"
if (grepl(">[^=]", fltr_xpr)) { # ">" but not ">="
values <- get_vals(fltr_xpr, ">")
add_condition("greaterThan", values)
}
# Extract conditions for "greaterThanOrEqual"
if (grepl(">=", fltr_xpr)) {
values <- get_vals(fltr_xpr, ">=")
add_condition("greaterThanOrEqual", values)
}
}
conditions
}
#' get rows to hide (in _all but not in _sel)
#' @param wb a wbWorkbook
#' @param tab_len an index for a table object in the workbook
#' @param filter the filter as character string
rows_to_hide <- function(wb, tab_len, filter) {
x <- wb_to_df(wb, sheet = wb$tables$tab_sheet[tab_len], dims = wb$tables$tab_ref[tab_len])
rows_all <- rownames(x)
x <- x[eval(parse(text = filter)), ]
rows_sel <- rownames(x)
out <- rows_all[!rows_all %in% rows_sel]
as.integer(out)
}
escape_varname <- function(var_name) {
vapply(var_name, function(x) {
ifelse(grepl("\\s", x), sprintf("`%s`", x), x)
}, NA_character_)
}
#' function to apply data table filter
#' @param choose a base R statement to filter an object "x$cyl == 4"
#' @inheritParams openxlsx2::wb_add_data_table
wb_filter_data_table <- function(wb, sheet = current_sheet(), x, dims = "A1", choose = NULL, ...) {
openxlsx2:::assert_workbook(wb)
## add the data table object
wb$add_data_table(wb, sheet = sheet, x = x, dims = dims, ...)
if (!is.null(choose)) {
nms <- names(choose)
nms <- paste0("x$", escape_varname(nms))
filter <- vapply(seq_along(nms), function(i) {
gsub("x", replacement = nms[i], x = choose[i])},
NA_character_)
print(filter)
openxlsx2:::assert_class(filter, "character")
## assume that the table created is the last table added to the workbook
tab_len <- length(wb$tables$tab_xml)
### prepare condition list & autofilter xml
fltr <- create_conditions(filter)
new_autofilter <- prepare(tabs = wb$tables, tab_name = wb$tables$tab_name[tab_len], conditions = fltr)
sel <- rows_to_hide(wb, tab_len, filter)
# hide rows
wb$set_row_heights(rows = sel, hidden = TRUE)
# add auto filter to table
wb$tables$tab_xml[tab_len] <- gsub(
pattern = xml_node(wb$tables$tab_xml[tab_len], "table", "autoFilter"),
replacement = new_autofilter,
x = wb$tables$tab_xml[tab_len]
)
}
invisible(wb)
}
library(openxlsx2)
### example 1
wb <- wb_workbook() %>%
wb_add_worksheet() %>%
wb_filter_data_table(x = mtcars, choose = c(cyl = "x %in% c(4, 8)", am = c("x != 1")))
if (interactive()) wb$open()
### example 2
df <- esoph
names(df) <- c("Age group", "Alcohol consumption", "Tobacco consumption", "Number of cases", "Number of controls")
wb <- wb_workbook()$add_worksheet()
wb <- wb_filter_data_table(wb, x = df, choose = c(`Age group` = "x >'25-34'"))
if (interactive()) wb$open()
### example 3
df <- ggplot2::economics
wb <- wb_workbook()$add_worksheet()
wb <- wb_filter_data_table(wb, x = df, choose = c(date = "x > as.Date('2010-01-01')"))
if (interactive()) wb$open()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment