Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xiaodaigh/7803454 to your computer and use it in GitHub Desktop.
Save xiaodaigh/7803454 to your computer and use it in GitHub Desktop.
Visual variable clustering
data <- read.csv("c:/bb2.csv")
cor.data <- cor(data[sapply(data,typeof)=="double"],method="spearman")
dist <- function(y,x) {
if(is.null(dim(x))) {
sqrt((x[1] - y[1])^2 + (x[2] - y[2])^2)
} else {
sqrt((x[,1] - y[1])^2 + (x[,2] - y[2])^2)
}
}
find <- function(coor,d,pt = 2*runif(2)-1,conv = 0.01) {
#browser()
actual_dist <- dist(pt,coor)
r <- d/actual_dist
r <- ifelse(r == Inf,1,r)
pts <- diag(r) %*% matrix(rep(pt,nrow(coor)),ncol=2,byrow=TRUE)
candidate_pts <- pts - diag(r - 1)%*%coor
new_pt <- apply(candidate_pts,2,weighted.mean,abs(r-1))
#print(new_pt)
#browser()
if(all(abs(new_pt - pt < conv))) return(round(new_pt,2))
else return(find(coor,d,pt = new_pt,conv))
}
ndim <- dim(cor.data)[1]
coor <- rbind(c(0,0),c(0,cor.data[1,2]))
nr <- nrow(coor)
while(nr < ndim ) {
d <- 1 - abs(cor.data[1:nr,nr + 1])
new_pt <- find(coor,d)
coor <- as.matrix(rbind(coor,new_pt))
nr <- nrow(coor)
}
plot(coor,col=kmeans(coor,20)$cluster)
abs.cor.data <- abs(cor.data)
all.dist <- apply(coor,1,dist,coor)
mad <- max(all.dist)
measure <- sum((all.dist - (1-abs.cor.data))^2)
mad_backup = Inf
measure_backup =Inf
coor_backup <- coor
while(measure < measure_backup) {
coor_backup <- coor
mad_backup <- mad
measure_backup <- measure
for(i in 3:ndim){
d <- 1 - abs(cor.data[(1:ndim)[-i],i])
coor[i,] <- find(coor[-i,],d,coor[i,])
}
all.dist <- apply(coor,1,dist,coor)
mad <- max(all.dist)
measure <- sum((all.dist - (1-abs.cor.data))^2)
print(measure)
plot(coor,col=kmeans(coor,20)$cluster)
}
coor <- coor_backup
mad_backup <- Inf
while(mad < mad_backup) {
i <- sort(which(all.dist == mad,arr.ind=TRUE)[1,])[1]
d <- 1 - abs(cor.data[(1:ndim)[-i],i])
coor_backup <- coor
mad_backup <- mad
coor[i,] <- find(coor[-i,],d,coor[i,])
all.dist <- apply(coor,1,dist,coor)
mad <- max(all.dist)
print(mad)
plot(coor,col=kmeans(coor,20)$cluster)
}
coor <- coor_backup
plot(coor,col=kmeans(coor,12)$cluster)
obj<-kmeans(coor,12)
text(obj$centers[,1],y=obj$centers[,2],1:12)
n <- names(data)
nn <- c(n[obj$cluster==1][1],n[obj$cluster==3][1])
cor(data[nn],method="spearman")
a <- function(i) {
kmeans(coor,i)$tot.withinss
}
aa <- sapply(1:round(ndim/2,0),a)
daa <- diff(aa)
rdaa <- rev(daa)
daa1 <- cumsum(rdaa)/seq(1,length(rdaa))
diff(daa1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment