Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
cluster and multiprocess with future and analogsea
library(analogsea)
library(tidyverse)
library(furrr)
cluster_tag <- 'r_cluster'
cluster_prefix <- 'node-'
number_of_nodes <- 2
# Path to private SSH key that matches key uploaded to DigitalOcean
# it looks like ~ does not work here... not sure why
ssh_private_key_file <- "/Users/jal/.ssh/id_rsa"
my_droplets <- docklets_create(names = paste0(cluster_prefix, as.character(1:number_of_nodes)),
region = "sfo2",
size = "16gb",
tags = cluster_tag)
# pull the ip addresses for the droplets
ips <- droplets(tag=cluster_tag) %>%
map_chr(~.x$networks$v4[[1]]$ip_address)
# Connect and create a cluster
cl <- makeClusterPSOCK(
# vector of IPs in our cluster
workers = ips,
# DigitalOcean droplets use root for user
user = "root",
# use the key connected to Digital Ocean
rshopts = c(
"-o", "StrictHostKeyChecking=no",
"-o", "IdentitiesOnly=yes",
"-i", ssh_private_key_file
),
# run Rscript in the tidyverse docker
rscript = c("sudo","docker","run","--net=host","rocker/tidyverse","Rscript"),
rscript_args = c(
# Install furrr (future too)
"-e", shQuote("install.packages('furrr')")
),
verbose=TRUE
)
simulate_pi <- function(n_iterations) {
rand_draws <- matrix(runif(2 * n_iterations, -1, 1), ncol = 2)
num_in <- sum(sqrt(rand_draws[,1]**2 + rand_draws[,2]**2) <= 1)
pi_hat <- (num_in / n_iterations) * 4
return(pi_hat)
}
library(tictoc)
draw_list <- as.list(rep(2500000, 400))
plan(cluster, workers = cl)
tic('simulate pi - parallel - no multiprocess on nodes')
sims_list <- future_map(draw_list, simulate_pi)
toc()
plan(list(tweak(cluster, workers = cl), multiprocess))
tic('simulate pi - parallel - with multiprocess on nodes')
sims_list <- future_map(draw_list, simulate_pi)
toc()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment