Skip to content

Instantly share code, notes, and snippets.

@DavisVaughan
Created May 6, 2020 11:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DavisVaughan/661da5a3c9c953d81b7705591f02aff3 to your computer and use it in GitHub Desktop.
Save DavisVaughan/661da5a3c9c953d81b7705591f02aff3 to your computer and use it in GitHub Desktop.
library(future)
# //////////////////////////////////////////////////////////////////////////////
# Fresh R session takes around 40MB
pryr::mem_used()
#> Registered S3 method overwritten by 'pryr':
#> method from
#> print.bytes Rcpp
#> 35.7 MB
expr <- rlang::expr({
# 80 MB data frame
x <- replicate(10, list(1:1e6 + 0), simplify = FALSE)
names(x) <- paste0("a", seq_len(10))
x <- as.data.frame(x)
# Assign it to some random name in the global env to ensure
# it isn't just being overwritten
random_name <- stringi::stri_rand_strings(1, 20)
assign(random_name, value = x, envir = .GlobalEnv)
# How much memory is being used?
# We expect around 120MB
pryr::mem_used()
})
# //////////////////////////////////////////////////////////////////////////////
cl <- makeClusterPSOCK(1)
plan(cluster, workers = cl)
# Launch 20 futures at the same worker
fs <- vector("list", 20)
for (i in 1:20) {
fs[[i]] <- future(expr, substitute = FALSE)
}
# What are the memory usage values?
result <- values(fs)
#> Registered S3 method overwritten by 'pryr':
#> method from
#> print.bytes Rcpp
parallel::stopCluster(cl)
result
#> [[1]]
#> 112 MB
#>
#> [[2]]
#> 112 MB
#>
#> [[3]]
#> 112 MB
#>
#> [[4]]
#> 112 MB
#>
#> [[5]]
#> 112 MB
#>
#> [[6]]
#> 112 MB
#>
#> [[7]]
#> 112 MB
#>
#> [[8]]
#> 112 MB
#>
#> [[9]]
#> 112 MB
#>
#> [[10]]
#> 112 MB
#>
#> [[11]]
#> 112 MB
#>
#> [[12]]
#> 112 MB
#>
#> [[13]]
#> 112 MB
#>
#> [[14]]
#> 112 MB
#>
#> [[15]]
#> 112 MB
#>
#> [[16]]
#> 112 MB
#>
#> [[17]]
#> 112 MB
#>
#> [[18]]
#> 112 MB
#>
#> [[19]]
#> 112 MB
#>
#> [[20]]
#> 112 MB
# //////////////////////////////////////////////////////////////////////////////
# Run it again but this time with a persistent worker that doesn't clean
# up between calls
cl <- makeClusterPSOCK(1)
plan(cluster, workers = cl, persistent = TRUE)
# Launch 20 futures at the same worker
fs <- vector("list", 20)
for (i in 1:20) {
fs[[i]] <- future(expr, substitute = FALSE)
}
# What are the memory usage values?
result <- values(fs)
#> Registered S3 method overwritten by 'pryr':
#> method from
#> print.bytes Rcpp
parallel::stopCluster(cl)
result
#> [[1]]
#> 112 MB
#>
#> [[2]]
#> 192 MB
#>
#> [[3]]
#> 272 MB
#>
#> [[4]]
#> 352 MB
#>
#> [[5]]
#> 432 MB
#>
#> [[6]]
#> 512 MB
#>
#> [[7]]
#> 592 MB
#>
#> [[8]]
#> 672 MB
#>
#> [[9]]
#> 752 MB
#>
#> [[10]]
#> 832 MB
#>
#> [[11]]
#> 912 MB
#>
#> [[12]]
#> 992 MB
#>
#> [[13]]
#> 1.07 GB
#>
#> [[14]]
#> 1.15 GB
#>
#> [[15]]
#> 1.23 GB
#>
#> [[16]]
#> 1.31 GB
#>
#> [[17]]
#> 1.39 GB
#>
#> [[18]]
#> 1.47 GB
#>
#> [[19]]
#> 1.55 GB
#>
#> [[20]]
#> 1.63 GB
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment