Skip to content

Instantly share code, notes, and snippets.

@benjamin-chan
Last active August 29, 2015 14:05
Show Gist options
  • Save benjamin-chan/0cace9083f65657d4112 to your computer and use it in GitHub Desktop.
Save benjamin-chan/0cace9083f65657d4112 to your computer and use it in GitHub Desktop.
Test parallelization

Test parallel

require(MASS)
## Loading required package: MASS
head(Boston)
##      crim zn indus chas   nox    rm  age   dis rad tax ptratio black lstat
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.090   1 296    15.3 396.9  4.98
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.967   2 242    17.8 396.9  9.14
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.967   2 242    17.8 392.8  4.03
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.062   3 222    18.7 394.6  2.94
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.062   3 222    18.7 396.9  5.33
## 6 0.02985  0  2.18    0 0.458 6.430 58.7 6.062   3 222    18.7 394.1  5.21
##   medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7

Set number of random sets to choose.

clusters <- 24
sets <- 1E4 * clusters
setsPerCluster <- sets / clusters

Use a non-parallelized call to kmeans().

system.time(result1 <- kmeans(Boston, 4, nstart=sets))
##    user  system elapsed 
##  180.60    1.33  182.35

Use lapply().

useLapply <- function () {
  results <- lapply(rep(setsPerCluster, clusters), function(nstart) kmeans(Boston, 4, nstart=nstart))
  i <- sapply(results, function(result) result$tot.withinss)
  results[[which.min(i)]]
}
system.time(result2 <- useLapply())
##    user  system elapsed 
##  178.57    1.67  180.73

Use the snow package.

useSnow <- function () {
  require(snow)
  cl <- makeCluster(clusters, type="SOCK")
  ignore <- clusterEvalQ(cl, {library(MASS); NULL})
  results <- clusterApply(cl, rep(setsPerCluster, clusters), function(nstart) kmeans(Boston, 4, nstart=nstart))
  stopCluster(cl)
  i <- sapply(results, function(result) result$tot.withinss)
  results[[which.min(i)]]
}
system.time(result3 <- useSnow())
## Loading required package: snow
## Warning: package 'snow' was built under R version 3.0.3
##    user  system elapsed 
##    0.17    0.05   58.39

Use the parallel package.

useParallel <- function () {
  require(parallel)
  cl <- makeCluster(clusters)
  clusterSetRNGStream(cl)
  clusterEvalQ(cl, library(MASS))
  results <- clusterApply(cl, rep(setsPerCluster, clusters), function(nstart) kmeans(Boston, 4, nstart=nstart))
  stopCluster(cl)
  i <- sapply(results, function(result) result$tot.withinss)
  results[[which.min(i)]]
}
system.time(result4 <- useParallel())
## Loading required package: parallel
## 
## Attaching package: 'parallel'
## 
## The following objects are masked from 'package:snow':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, clusterSplit, makeCluster,
##     parApply, parCapply, parLapply, parRapply, parSapply,
##     splitIndices, stopCluster
##    user  system elapsed 
##    0.19    0.03   51.81

Show results.

result1$centers
##      crim     zn  indus    chas    nox    rm   age   dis    rad   tax
## 1  0.7413  9.949 12.984 0.06122 0.5822 6.190 73.29 3.332  4.827 406.1
## 2 15.2190  0.000 17.927 0.02632 0.6737 6.066 89.91 1.994 22.500 644.7
## 3  0.2410 17.817  6.669 0.07463 0.4834 6.465 55.71 4.874  4.313 276.5
## 4 10.9105  0.000 18.573 0.07843 0.6712 5.982 89.91 2.077 23.020 668.2
##   ptratio  black  lstat  medv
## 1   17.67 371.66 12.715 22.38
## 2   19.93  57.79 20.449 13.13
## 3   17.87 387.81  9.538 25.87
## 4   20.20 371.80 17.874 17.43
result2$centers
##      crim     zn  indus    chas    nox    rm   age   dis    rad   tax
## 1  0.7413  9.949 12.984 0.06122 0.5822 6.190 73.29 3.332  4.827 406.1
## 2  0.2410 17.817  6.669 0.07463 0.4834 6.465 55.71 4.874  4.313 276.5
## 3 10.9105  0.000 18.573 0.07843 0.6712 5.982 89.91 2.077 23.020 668.2
## 4 15.2190  0.000 17.927 0.02632 0.6737 6.066 89.91 1.994 22.500 644.7
##   ptratio  black  lstat  medv
## 1   17.67 371.66 12.715 22.38
## 2   17.87 387.81  9.538 25.87
## 3   20.20 371.80 17.874 17.43
## 4   19.93  57.79 20.449 13.13
result3$centers
##      crim     zn  indus    chas    nox    rm   age   dis    rad   tax
## 1 15.2190  0.000 17.927 0.02632 0.6737 6.066 89.91 1.994 22.500 644.7
## 2  0.7413  9.949 12.984 0.06122 0.5822 6.190 73.29 3.332  4.827 406.1
## 3 10.9105  0.000 18.573 0.07843 0.6712 5.982 89.91 2.077 23.020 668.2
## 4  0.2410 17.817  6.669 0.07463 0.4834 6.465 55.71 4.874  4.313 276.5
##   ptratio  black  lstat  medv
## 1   19.93  57.79 20.449 13.13
## 2   17.67 371.66 12.715 22.38
## 3   20.20 371.80 17.874 17.43
## 4   17.87 387.81  9.538 25.87
result4$centers
##      crim     zn  indus    chas    nox    rm   age   dis    rad   tax
## 1 15.2190  0.000 17.927 0.02632 0.6737 6.066 89.91 1.994 22.500 644.7
## 2  0.7413  9.949 12.984 0.06122 0.5822 6.190 73.29 3.332  4.827 406.1
## 3 10.9105  0.000 18.573 0.07843 0.6712 5.982 89.91 2.077 23.020 668.2
## 4  0.2410 17.817  6.669 0.07463 0.4834 6.465 55.71 4.874  4.313 276.5
##   ptratio  black  lstat  medv
## 1   19.93  57.79 20.449 13.13
## 2   17.67 371.66 12.715 22.38
## 3   20.20 371.80 17.874 17.43
## 4   17.87 387.81  9.538 25.87
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment