how different definition of distance changes our view of clustering
# you can run this script with the following R command: | |
# source('https://gist.githubusercontent.com/tdunning/badb88043d41d916a3148c669f2fb0cd/raw/8d3289fdbf2a7999bd5d9687002488b904e1d82f/viewpoints.r') | |
set.seed(1) | |
noise = matrix(nrow=2000, ncol=8, data=rnorm(4*8*500)) | |
offsets = matrix( | |
c(rep(-1,1000), rep(1,1000), | |
rep(-1, 500), rep(1, 500), rep(-1, 500), rep(1, 500)), | |
ncol=2) | |
xy = rbind(matrix(nrow=2000, ncol=2, data=rnorm(2*2000))) + offsets * 8 | |
x = cbind(xy, noise) | |
c1 = rbind(matrix(0, nrow=2, ncol=2), matrix(rnorm(2*8), nrow=8)) | |
c2 = rbind(matrix(c(1,0,0,0,0,1), nrow=3,byrow=T), matrix(0, nrow=7, ncol=2)) | |
c3 = rbind(matrix(c(0.5,0,-0.5,0,0,0.8), nrow=3,byrow=T), matrix(0, nrow=7, ncol=2)) | |
c4 = diag(nrow=10, ncol=2) | |
par(mar=c(1,1,1,1), mfrow=c(2,2)) | |
show = function(data) { | |
plot(data, col=NA, bg=rgb(0,0,0,0.2), pch=21, | |
xlab=NA, ylab=NA, xlim=c(-12, 12), ylim=c(-12, 12), | |
xaxt='n', yaxt='n') | |
} | |
show(x %*% c1) | |
show(x %*% c2) | |
show(x %*% c3) | |
show(x %*% c4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment