Skip to content

Instantly share code, notes, and snippets.

@stevenworthington
Last active March 4, 2021 10:58
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stevenworthington/5879265 to your computer and use it in GitHub Desktop.
Save stevenworthington/5879265 to your computer and use it in GitHub Desktop.
Calculate K-medoids using the uncentered correlation distance method
# example of calculating K-medoids using the uncentered
# correlation metric as a measure of distance
# 0) load data
data(mtcars)
# 1) create a distance matrix using the "cosine of the angle" method (aka, uncentered correlation)
# a) using the designdist() function in the vegan package
# install.packages("vegan", dep = TRUE)
library(vegan)
distMat1 <- designdist(mtcars, method = "1-J/sqrt(A*B)", terms = "quadratic",
name = "cosine.complement")
# b) using the distancematrix() function in the hopack package
# source("http://bioconductor.org/biocLite.R")
# biocLite("hopach")
library(hopach)
distMat2 <- as.matrix( distancematrix(mtcars, d = "cosangle", na.rm = TRUE) )
# 2) implement partitioning clustering using the distance matrix
# install.packages("cluster", dep = TRUE)
library(cluster)
clusterObject1 <- pam(distMat1, k = 5, diss = TRUE, keep.diss = TRUE)
clusterObject2 <- pam(distMat2, k = 5, diss = TRUE, keep.diss = TRUE)
# 3) assign cluster IDs to data
mtcars$clusters1 <- clusterObject1$clustering
mtcars$clusters2 <- clusterObject2$clustering
# 4) view cluster IDs
options(width = 160)
mtcars
# 5) visualize clusters
# install.packages("fpc", dep = TRUE)
library(fpc)
clusplot(clusterObject1, color = TRUE, shade = TRUE, labels = 2, lines = 0, cex = 0.7,
main = "Principal Components Analysis of k-medoids Partitions")
# 6) visualize distance matrix
# a) dimension reduction, via principal components analysis
pca <- prcomp(distMat1)
pca$x[duplicated(pca$x[, 1]), 1] # show duplicate values
# b) dimension reduction, via isometric feature mapping ordination
library(vegan)
iso <- isomap(distMat1, k = 10) # may need to adjust the value of k if data are fragmented
# c) dimension reduction, via metric multidimensional scaling
mds <- cmdscale(d = distMat1, k = 2)
# d) plot the Voronoi tessellation
# install.packages("tripack", dep = TRUE)
library(tripack)
# pca
plot( voronoi.mosaic(pca$x[, 1], pca$x[, 2], duplicate = "remove") )
points(pca$x, pch = 13, col = "red")
text(pca$x, labels = rownames(pca$x), pos = 3, offset = 0.5, cex = 0.7, col = "red")
# isomap
plot( voronoi.mosaic(iso$points[, 1], iso$points[, 2], duplicate = "remove") )
points(iso$points, pch = 13, col = "red")
text(iso$points, labels = rownames(iso$points), pos = 3, offset = 0.5, cex = 0.7, col = "red")
# mds
plot( voronoi.mosaic(mds[, 1], mds[, 2], duplicate = "remove") )
points(mds, pch = 13, col = "red")
text(mds, labels = rownames(mds), pos = 3, offset = 0.5, cex = 0.7, col = "red")
# 7) cluster using Euclidean and Mahalanobis distances
a) Euclidean distance
# either directly:
k_Euclid <- pam(mtcars, 5, metric = "euclidean")
# or getting the distance matrix separately and then feeding it to pam()
Euclid_mat <- daisy(mtcars, metric = "euclidean")
k_Euclid2 <- pam(Euclid_mat, 5, diss = TRUE)
# b) Mahalanobis distance
# install.packages("HDMD", dep = TRUE)
library(HDMD)
Mahal <- pairwise.mahalanobis(mtcars, grouping = rownames(mtcars), cov(mtcars))
library(cluster)
k_Mahal <- pam(Mahal$distance, k = 5, diss = TRUE, keep.diss = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment