Skip to content

Instantly share code, notes, and snippets.

@coppeliaMLA
Last active August 29, 2015 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save coppeliaMLA/92e2ae4c32b0ab9b6196 to your computer and use it in GitHub Desktop.
Save coppeliaMLA/92e2ae4c32b0ab9b6196 to your computer and use it in GitHub Desktop.
Generates the data for comparing two clusters using a Sankey diagram
clusComp<-function(cl1, cl2, num.clus){
#Set up object for recording clusters
clus.change<-NULL
ct1<-cutree(cl1, k=num.clus)
add.1<-data.frame(size=rep(1, length(ct1)), ind=names(ct1), cluster=paste0(1, ".", ct1))
ct2<-cutree(cl2, k=num.clus)
add.2<-data.frame(size=rep(2, length(ct2)), ind=names(ct2), cluster=paste0(2, ".", ct2))
clus.change<-rbind(add.1, add.2)
#Reshape the data
m<-merge(clus.change, clus.change, by="ind")
d<-dcast(data=m, size.x+size.y+cluster.x+cluster.y~., length)
#Filter to just the sequential steps
ex<-d[d$size.y==(d$size.x+1),3:5]
#Create the nodes
ind.list<-ddply(.data=clus.change[,c(3,2)], .(cluster), .fun= function(x) paste0(x[,2], collapse=" <br> "))
nodes.df<-ind.list
names(nodes.df)<-c("name", "ind")
#Convert to JSON
dfToJSON<-function(df, mode='vector'){
colToList<-function(x, y){
col.json<-list()
#Build up a list of coordinates
for (i in 1:length(x)){
ni<-list(x=x[i], y=y[i])
col.json[[length(col.json)+1]]<-ni
}
return(col.json)
}
if (mode=='vector'){
for.json<-list()
for (j in 1:ncol(df)){
for.json[[length(for.json)+1]]<-list(name=colnames(df)[j] , data=df[,j])
}
}
if (mode=='coords') {
for.json<-list()
for (j in 2:ncol(df)){
for.json[[length(for.json)+1]]<-list(name=colnames(df)[j] , data=colToList(x=df[,1], y=df[,j]))
}
}
if (mode=='rowToObject') {
for.json<-list()
for (j in 1:nrow(df)){
# for.json[[length(for.json)+1]]<-list(df[j,])
for.json[[length(for.json)+1]]<-df[j,]
}
}
jj<-toJSON(for.json)
return(jj)
}
n<-dfToJSON(nodes.df, 'rowToObject')
#Create the edges
links<-NULL
for (i in 1:nrow(ex )){
s<-which(nodes.df$name==ex$cluster.x[i])-1
t<-which(nodes.df$name==ex$cluster.y[i])-1
links<-rbind(links, data.frame(source=s,target=t, value=ex[i,3]))
}
e<-dfToJSON(links, 'rowToObject')
graph<-list(nodes=n, edges=e)
return(graph)
}
library(rjson)
library(stringr)
library(reshape2)
cc<-clusComp(cl1, cl2, 80)
fileConn<-file("path/sankeyClustersTest.jsonp")
writeLines(paste0("{ \"nodes\":", cc$nodes, ", \"links\": ", cc$edges, "}"), fileConn)
close(fileConn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment