Last active
March 4, 2021 10:58
-
-
Save stevenworthington/5879265 to your computer and use it in GitHub Desktop.
Calculate K-medoids using the uncentered correlation distance method
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# example of calculating K-medoids using the uncentered | |
# correlation metric as a measure of distance | |
# 0) load data | |
data(mtcars) | |
# 1) create a distance matrix using the "cosine of the angle" method (aka, uncentered correlation) | |
# a) using the designdist() function in the vegan package | |
# install.packages("vegan", dep = TRUE) | |
library(vegan) | |
distMat1 <- designdist(mtcars, method = "1-J/sqrt(A*B)", terms = "quadratic", | |
name = "cosine.complement") | |
# b) using the distancematrix() function in the hopack package | |
# source("http://bioconductor.org/biocLite.R") | |
# biocLite("hopach") | |
library(hopach) | |
distMat2 <- as.matrix( distancematrix(mtcars, d = "cosangle", na.rm = TRUE) ) | |
# 2) implement partitioning clustering using the distance matrix | |
# install.packages("cluster", dep = TRUE) | |
library(cluster) | |
clusterObject1 <- pam(distMat1, k = 5, diss = TRUE, keep.diss = TRUE) | |
clusterObject2 <- pam(distMat2, k = 5, diss = TRUE, keep.diss = TRUE) | |
# 3) assign cluster IDs to data | |
mtcars$clusters1 <- clusterObject1$clustering | |
mtcars$clusters2 <- clusterObject2$clustering | |
# 4) view cluster IDs | |
options(width = 160) | |
mtcars | |
# 5) visualize clusters | |
# install.packages("fpc", dep = TRUE) | |
library(fpc) | |
clusplot(clusterObject1, color = TRUE, shade = TRUE, labels = 2, lines = 0, cex = 0.7, | |
main = "Principal Components Analysis of k-medoids Partitions") | |
# 6) visualize distance matrix | |
# a) dimension reduction, via principal components analysis | |
pca <- prcomp(distMat1) | |
pca$x[duplicated(pca$x[, 1]), 1] # show duplicate values | |
# b) dimension reduction, via isometric feature mapping ordination | |
library(vegan) | |
iso <- isomap(distMat1, k = 10) # may need to adjust the value of k if data are fragmented | |
# c) dimension reduction, via metric multidimensional scaling | |
mds <- cmdscale(d = distMat1, k = 2) | |
# d) plot the Voronoi tessellation | |
# install.packages("tripack", dep = TRUE) | |
library(tripack) | |
# pca | |
plot( voronoi.mosaic(pca$x[, 1], pca$x[, 2], duplicate = "remove") ) | |
points(pca$x, pch = 13, col = "red") | |
text(pca$x, labels = rownames(pca$x), pos = 3, offset = 0.5, cex = 0.7, col = "red") | |
# isomap | |
plot( voronoi.mosaic(iso$points[, 1], iso$points[, 2], duplicate = "remove") ) | |
points(iso$points, pch = 13, col = "red") | |
text(iso$points, labels = rownames(iso$points), pos = 3, offset = 0.5, cex = 0.7, col = "red") | |
# mds | |
plot( voronoi.mosaic(mds[, 1], mds[, 2], duplicate = "remove") ) | |
points(mds, pch = 13, col = "red") | |
text(mds, labels = rownames(mds), pos = 3, offset = 0.5, cex = 0.7, col = "red") | |
# 7) cluster using Euclidean and Mahalanobis distances | |
a) Euclidean distance | |
# either directly: | |
k_Euclid <- pam(mtcars, 5, metric = "euclidean") | |
# or getting the distance matrix separately and then feeding it to pam() | |
Euclid_mat <- daisy(mtcars, metric = "euclidean") | |
k_Euclid2 <- pam(Euclid_mat, 5, diss = TRUE) | |
# b) Mahalanobis distance | |
# install.packages("HDMD", dep = TRUE) | |
library(HDMD) | |
Mahal <- pairwise.mahalanobis(mtcars, grouping = rownames(mtcars), cov(mtcars)) | |
library(cluster) | |
k_Mahal <- pam(Mahal$distance, k = 5, diss = TRUE, keep.diss = TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment