require(MASS)
## Loading required package: MASS
head(Boston)
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.090 1 296 15.3 396.9 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.967 2 242 17.8 396.9 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.967 2 242 17.8 392.8 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.062 3 222 18.7 394.6 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.062 3 222 18.7 396.9 5.33
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.062 3 222 18.7 394.1 5.21
## medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
Set number of random sets to choose.
clusters <- 24
sets <- 1E4 * clusters
setsPerCluster <- sets / clusters
Use a non-parallelized call to kmeans().
system.time(result1 <- kmeans(Boston, 4, nstart=sets))
## user system elapsed
## 180.60 1.33 182.35
Use lapply().
useLapply <- function () {
results <- lapply(rep(setsPerCluster, clusters), function(nstart) kmeans(Boston, 4, nstart=nstart))
i <- sapply(results, function(result) result$tot.withinss)
results[[which.min(i)]]
}
system.time(result2 <- useLapply())
## user system elapsed
## 178.57 1.67 180.73
Use the snow package.
useSnow <- function () {
require(snow)
cl <- makeCluster(clusters, type="SOCK")
ignore <- clusterEvalQ(cl, {library(MASS); NULL})
results <- clusterApply(cl, rep(setsPerCluster, clusters), function(nstart) kmeans(Boston, 4, nstart=nstart))
stopCluster(cl)
i <- sapply(results, function(result) result$tot.withinss)
results[[which.min(i)]]
}
system.time(result3 <- useSnow())
## Loading required package: snow
## Warning: package 'snow' was built under R version 3.0.3
## user system elapsed
## 0.17 0.05 58.39
Use the parallel package.
useParallel <- function () {
require(parallel)
cl <- makeCluster(clusters)
clusterSetRNGStream(cl)
clusterEvalQ(cl, library(MASS))
results <- clusterApply(cl, rep(setsPerCluster, clusters), function(nstart) kmeans(Boston, 4, nstart=nstart))
stopCluster(cl)
i <- sapply(results, function(result) result$tot.withinss)
results[[which.min(i)]]
}
system.time(result4 <- useParallel())
## Loading required package: parallel
##
## Attaching package: 'parallel'
##
## The following objects are masked from 'package:snow':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, clusterSplit, makeCluster,
## parApply, parCapply, parLapply, parRapply, parSapply,
## splitIndices, stopCluster
## user system elapsed
## 0.19 0.03 51.81
Show results.
result1$centers
## crim zn indus chas nox rm age dis rad tax
## 1 0.7413 9.949 12.984 0.06122 0.5822 6.190 73.29 3.332 4.827 406.1
## 2 15.2190 0.000 17.927 0.02632 0.6737 6.066 89.91 1.994 22.500 644.7
## 3 0.2410 17.817 6.669 0.07463 0.4834 6.465 55.71 4.874 4.313 276.5
## 4 10.9105 0.000 18.573 0.07843 0.6712 5.982 89.91 2.077 23.020 668.2
## ptratio black lstat medv
## 1 17.67 371.66 12.715 22.38
## 2 19.93 57.79 20.449 13.13
## 3 17.87 387.81 9.538 25.87
## 4 20.20 371.80 17.874 17.43
result2$centers
## crim zn indus chas nox rm age dis rad tax
## 1 0.7413 9.949 12.984 0.06122 0.5822 6.190 73.29 3.332 4.827 406.1
## 2 0.2410 17.817 6.669 0.07463 0.4834 6.465 55.71 4.874 4.313 276.5
## 3 10.9105 0.000 18.573 0.07843 0.6712 5.982 89.91 2.077 23.020 668.2
## 4 15.2190 0.000 17.927 0.02632 0.6737 6.066 89.91 1.994 22.500 644.7
## ptratio black lstat medv
## 1 17.67 371.66 12.715 22.38
## 2 17.87 387.81 9.538 25.87
## 3 20.20 371.80 17.874 17.43
## 4 19.93 57.79 20.449 13.13
result3$centers
## crim zn indus chas nox rm age dis rad tax
## 1 15.2190 0.000 17.927 0.02632 0.6737 6.066 89.91 1.994 22.500 644.7
## 2 0.7413 9.949 12.984 0.06122 0.5822 6.190 73.29 3.332 4.827 406.1
## 3 10.9105 0.000 18.573 0.07843 0.6712 5.982 89.91 2.077 23.020 668.2
## 4 0.2410 17.817 6.669 0.07463 0.4834 6.465 55.71 4.874 4.313 276.5
## ptratio black lstat medv
## 1 19.93 57.79 20.449 13.13
## 2 17.67 371.66 12.715 22.38
## 3 20.20 371.80 17.874 17.43
## 4 17.87 387.81 9.538 25.87
result4$centers
## crim zn indus chas nox rm age dis rad tax
## 1 15.2190 0.000 17.927 0.02632 0.6737 6.066 89.91 1.994 22.500 644.7
## 2 0.7413 9.949 12.984 0.06122 0.5822 6.190 73.29 3.332 4.827 406.1
## 3 10.9105 0.000 18.573 0.07843 0.6712 5.982 89.91 2.077 23.020 668.2
## 4 0.2410 17.817 6.669 0.07463 0.4834 6.465 55.71 4.874 4.313 276.5
## ptratio black lstat medv
## 1 19.93 57.79 20.449 13.13
## 2 17.67 371.66 12.715 22.38
## 3 20.20 371.80 17.874 17.43
## 4 17.87 387.81 9.538 25.87