Created
July 18, 2018 07:26
-
-
Save phabee/a77e5b82f0ae1529d27874cc2ff62fc6 to your computer and use it in GitHub Desktop.
Memory Allocation performance Test in R using statically initialized data.frames vs. dynamically growing data.frames
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Generate random Tour | |
#' | |
#' Generates a random Tour with nrows number of stops with x/y coordinates and | |
#' a 4-digit random ZIP-code. | |
#' | |
#' @param nrows numeric, the number of stops | |
#' | |
#' @return data.frame, the tour | |
#' @export | |
generate_random_tour_dynamic <- function(nrows) { | |
x <- c(runif(nrows, min = 0, max = 100)) | |
y <- c(runif(nrows, min = 0, max = 100)) | |
loc <- c(floor(runif(nrows, 1000, 9999))) | |
return(data.frame(x = x, y = y, loc = loc, stringsAsFactors = FALSE)) | |
} | |
#' Generate random Tour with max size | |
#' | |
#' Generates a random Tour with initialized maximal size (which should not be | |
#' exceeded) and stores current index position as well as maximal size in object | |
#' attributes assigned to the object, namely "_cur_idx" and "_max_idx". This | |
#' can lateron be used / accessed / updated by rbind-alternative implementation | |
#' to prevent the need of using the terribly bad performing rbind / rbindlist | |
#' operations. | |
#' | |
#' @param nrows | |
#' @param maxsize | |
#' | |
#' @return data.frame, the tour | |
#' @export | |
generate_random_tour_static <- function(nrows, maxsize) { | |
assertthat::assert_that(nrows <= maxsize) | |
x <- c(runif(nrows, min = 0, max = 100), rep(0, maxsize - nrows)) | |
y <- c(runif(nrows, min = 0, max = 100), rep(0, maxsize - nrows)) | |
loc <- c(floor(runif(nrows, 1000, 9999)), rep(NA, maxsize - nrows)) | |
df <- data.frame(x = x, y = y, loc = loc, stringsAsFactors = FALSE) | |
attr(df, "_cur_idx") <- nrows | |
attr(df, "_max_idx") <- maxsize | |
return(df) | |
} | |
#' Appends a given stop to a given tour | |
#' | |
#' @param tour data.frame, a given tour | |
#' @param stop data.frame, a given tour-stop | |
#' | |
#' @return data.frame, the tour with the stop appended at the end | |
#' @export | |
append_stop_dynamic <- function(tour, stop) { | |
return (rbind(tour, stop)) | |
} | |
#' Appends a given stop to a given statically sized tour-object | |
#' | |
#' This is the workaround to prevent rbind/rbindlist. Instead of using rbind | |
#' or rbindlist, which copy the datastructure with extended size, gc the old one | |
#' and return the newly growed object, we access the (expectedly set) object- | |
#' attribute "_cur_idx" and determine the position where the stop has to be | |
#' appended. Then we update the "_cur_idx" attribute and return the resulting | |
#' tour. | |
#' | |
#' @param tour data.frame, a given tour | |
#' @param stop data.frame, a given tour-stop | |
#' | |
#' @return data.frame, the tour with the stop appended at the end | |
#' @export | |
append_stop_static <- function(tour, stop) { | |
cur_pos <- attr(a, "_cur_idx") + 1 | |
tour[cur_pos,] <- stop | |
attr(tour, "_cur_idx") <- cur_pos | |
return (tour) | |
} | |
run_test_dynamic_allocation <- function(nloop, initsize) { | |
append_size <- 150 | |
for (j in 1:nloop) { | |
tour <- generate_random_tour_dynamic(initsize) | |
for (i in 1:append_size) { | |
stop <- generate_random_tour_dynamic(1) | |
tour <- append_stop_dynamic(tour, stop) | |
} | |
} | |
} | |
run_test_static_allocation <- function(nloop, initsize) { | |
append_size <- 150 | |
max_size <- initsize + append_size | |
for (j in 1:nloop) { | |
tour <- generate_random_tour_static(initsize, max_size) | |
for (i in 1:append_size) { | |
stop <- generate_random_tour_dynamic(1) | |
tour <- append_stop_static(tour, stop) | |
} | |
} | |
} | |
run_test_static_vs_dynamic <- function(nloop, initsize) { | |
a <- system.time(run_test_dynamic_allocation(nloop, initsize)) | |
cat("dynamic: ", a[1], "\n") | |
a <- system.time(run_test_static_allocation(nloop, initsize)) | |
cat("static: ", a[1]) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In this R-script we test the performance gain we may produce when using fix-sized datastructures (data.frames here) instead of dynamically growing ones. To make this possible, we make use of attributes to contain metadata about the maximal size as well as the current index. For performance-reasons we don't do for out-of-bound checks in the append_stop_static method here. Unfortunately, the speedup achieved by this workaround is not worth mentioning... :(