Last active
May 8, 2018 11:14
-
-
Save samuel-bohman/a614804ec8214a3ee5cc25aa946d4483 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compare configurations | |
h_configs <- dtwclust::compare_clusterings_configs( | |
types = "hierarchical", | |
k = 2L:30L, | |
controls = list( | |
hierarchical = hierarchical_control( | |
method = "all" | |
# distmat = d # Optional precomputed cross-distance matrix | |
) | |
), | |
preprocs = pdc_configs( | |
type = "preproc", | |
none = list() | |
), | |
distances = pdc_configs( | |
type = "distance", | |
L1 = list() | |
# L2 = list() | |
), | |
centroids = pdc_configs( | |
type = "centroid", | |
hierarchical = list(default = list()) | |
) | |
) | |
# Define score function | |
score_internal <- function(obj_list, ...) { | |
scores <- lapply(obj_list, function(obj) { | |
indices <- cvi(a = obj, type = c("Sil", "CH", "SF")) | |
}) | |
# return | |
do.call(rbind, scores) | |
} | |
# Setup parallel computing backend | |
workers <- parallel::makeCluster(parallel::detectCores()) # Create multi-process workers | |
# Load dtwclust in each worker, and make each one use 2 threads | |
invisible(parallel::clusterEvalQ(workers, { | |
library(dtwclust) | |
RcppParallel::setThreadOptions(2L) # All available threads are used by default | |
})) | |
# Register found workers | |
doParallel::registerDoParallel(workers) | |
# Main function | |
cc <- dtwclust::compare_clusterings( | |
series = v, | |
types = "hierarchical", | |
configs = h_configs, | |
# seed = 123, | |
trace = FALSE, | |
score.clus = score_internal, | |
shuffle.configs = TRUE, | |
return.objects = FALSE # To save RAM, set this to FALSE; see repeat_clustering() | |
) | |
# Calculate rank based on Borda method | |
cl_rank1 <- cc$results$hierarchical %>% | |
dplyr::select(config_id, k, method, distance, centroid, Sil, CH, SF) %>% | |
mutate_at(vars(Sil:SF), funs(dense_rank(desc(.)))) %>% | |
rowwise() %>% | |
mutate(sum = sum(c(Sil, CH, SF))) %>% | |
arrange(sum) | |
cl_rank1 | |
# Calculate rank based on mean (for plotting) | |
clz <- cc$results$hierarchical | |
clz$Sil <- dtwclust::zscore(clz$Sil) # Z-normalize | |
clz$CH <- zscore(clz$CH) | |
clz$SF <- zscore(clz$SF) | |
cl_rank2 <- clz %>% | |
dplyr::select(config_id, k, method, distance, centroid, Sil, CH, SF) %>% | |
rowwise() %>% | |
mutate(mean = mean(c(Sil, CH, SF))) %>% | |
# mutate(mean = weighted.mean(c(Sil, CH, SF, w = c(.25, .25, .25))) %>% | |
arrange(desc(mean)) | |
cl_rank2 | |
# Plot the results | |
cl_rank2 %>% | |
tidyr::gather(key = cvi, value = cvi_value, Sil, CH, SF) %>% | |
ggplot2::ggplot() + | |
geom_line(aes(x = k, y = cvi_value, color = cvi)) + | |
# geom_line(aes(x = k, y = mean), linetype = "solid") + | |
geom_point(aes(x = k, y = cvi_value, color = cvi), size = 1) + | |
scale_x_continuous(breaks = scales::pretty_breaks()) + | |
theme(panel.grid.minor.x = element_blank()) + | |
labs(x = "cluster size k", y = "z-value") + | |
guides(col = guide_legend(reverse = TRUE)) + | |
facet_wrap(~ method) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment