klmr/group-table-cols.r

## group-table-cols.r
#' Apply a function over groups of columns.
grouped_columns_apply = function (data, width, fun) {
    # Generate the group indices for the columns (see below).
    group_indices = group_indices(ncol(data), width)
    # For each group, apply `fun` to the columns of its indices.
    results = lapply(group_indices, function (indices) fun(data[, indices, drop = FALSE]))
    bind_cols(results)
}

# If the number of columns is always evenly divisible by the width, the following
# function works:
# group_indices = function (length, width) {
#     # Generate a vector of indices that specify group membership: for example,
#     #     1, 1, 1, 2, 2, 2
#     # … for length = 6 and width = 3.
#     index_groups = rep(seq_len(length / width), each = width)
#     # Split the indices according to the group membership.
#     split(seq_len(length), index_groups)
# }

# In general it’s unfortunately a bit more complicated:
group_indices = function (length, width) {
    # Ensure the length is evenly divisible by the width
    round_length = if (length %% width == 0) {
        length
    } else {
        length + width - length %% width
    }
    index_groups = rep(seq_len(round_length / width), each = width)
    split(seq_len(length), index_groups)
}

# Usage example:

data = data.frame(a = rnorm(16), b = rnorm(16), c = rnorm(16), d = rnorm(16), e = rnorm(16), f = rnorm(16))
grouped_columns_apply(data, width = 3, rowMeans)
	#' Apply a function over groups of columns.
	grouped_columns_apply = function (data, width, fun) {
	# Generate the group indices for the columns (see below).
	group_indices = group_indices(ncol(data), width)
	# For each group, apply `fun` to the columns of its indices.
	results = lapply(group_indices, function (indices) fun(data[, indices, drop = FALSE]))
	bind_cols(results)
	}

	# If the number of columns is always evenly divisible by the width, the following
	# function works:
	# group_indices = function (length, width) {
	# # Generate a vector of indices that specify group membership: for example,
	# # 1, 1, 1, 2, 2, 2
	# # … for length = 6 and width = 3.
	# index_groups = rep(seq_len(length / width), each = width)
	# # Split the indices according to the group membership.
	# split(seq_len(length), index_groups)
	# }

	# In general it’s unfortunately a bit more complicated:
	group_indices = function (length, width) {
	# Ensure the length is evenly divisible by the width
	round_length = if (length %% width == 0) {
	length
	} else {
	length + width - length %% width
	}
	index_groups = rep(seq_len(round_length / width), each = width)
	split(seq_len(length), index_groups)
	}

	# Usage example:

	data = data.frame(a = rnorm(16), b = rnorm(16), c = rnorm(16), d = rnorm(16), e = rnorm(16), f = rnorm(16))
	grouped_columns_apply(data, width = 3, rowMeans)