vsoch/plotly.R

## plotly.R
# This function will use plotly to create an online boxplot!

plotlyBox = function(username,key,datastore) {

  # Username is your plotly username
  # API key is found via plot.ly, login --> Access plotly --> settings
  # datalist is a list of data
  # The number of plots, N, is determined by length(N)

  # Eg, put your data into a list
  # datastore = list()
  # datastore[[1]] = boxplot1
  # datastore[[2]] = boxplot2

  # Other packages that you need installed!
  # Rcurl (for postForm)
  # RJSONIO (for toJSON)

  # You can install all with devtools:
  # install.packages('devtools')
  # library('devtools')
  # devtools::install_github("plotly/R-api")

  # Plotly functions code lovely and available at:
  # https://plot.ly/r/script-demos/spectral-density-box-plot-example/

  # ------------------------------------------------

  # Install plotly (or re-install, heh)
  require('devtools')
  require('plotly')

  py <- plotly(username=username, key=key)

  # Define number of boxes
  N = length(datastore)

  # Source makeColors function to define boxplot colors
  source('http://tinyurl.com/vsochR')

  # Define colors
  col = makeColors(N)

  # Now package up colors and data!
  package = lapply(
    seq(N),
    function(i){
      list(
        y = datastore[[i]],type = 'box', marker=list(color = col[i][[1]][1])
      )
    }
  )

  # Define the layout
  layout <- list(
    xaxis = list(
      showgrid = FALSE,
      zeroline = FALSE,
      tickangle = 60,
      showticklabels = FALSE
    ),
    yaxis = list(
      zeroline = FALSE,
      gridcolor = 'white'
    ),
    paper_bgcolor = 'rgb(233,233,233)',
    plot_bgcolor = 'rgb(233,233,233)',
    showlegend = FALSE
  )

  # Now send to plotly!
  response <- py$plotly(package, kwargs = list(layout=layout))
  return(response)

}

## rCloud.R
#-- PREPROCESSING------------------------------------------------

# Normalize data

normalize <- function(x) {
  x <- sweep(x, 2, apply(x, 2, min))
  sweep(x, 2, apply(x, 2, max), "/")
}

# Merge data based on row name and drop nonshared rows
mergeByRows = function(data1,data2){

  # Find values shared by both data
  shared.names = intersect(rownames(data1),rownames(data2))
  data1.sub = data1[which(rownames(data1) %in% shared.names),]
  data2.sub = data2[which(rownames(data2) %in% shared.names),]
  data = cbind(data1.sub,data2.sub)
  return(data)

}

#-K-MEANS CLUSTERING --------------------------------------------

# Run kmeans and visualize error against number of clusters

runKmeans = function(data) {

  require(graphics)

  x = normalize(data)

  # Determine number of clusters
  wss <- (nrow(x)-1)*sum(apply(x,2,var))
  for (i in 2:15) wss[i] <- sum(kmeans(x, centers=i)$withinss)
  plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares")

  # cluster based on 2,3,4,5 classes dimensional example
  for (k in 1:5) {
    (cl <- kmeans(x, k))
    points(cl$centers, col = 1:2, pch = 8, cex = 2)
  }

}

# Run kmeans and color based on a particular metric

runKmeansColor = function(data,colorBy) {

  require(graphics)

  # Normalize data
  normalize <- function(x) {
    x <- sweep(x, 2, apply(x, 2, min))
    sweep(x, 2, apply(x, 2, max), "/")
  }

  x = normalize(data)

  # Determine number of clusters
  wss <- (nrow(x)-1)*sum(apply(x,2,var))
  for (i in 2:15) wss[i] <- sum(kmeans(x, centers=i)$withinss)
  plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares")

  # cluster based on 2,3,4,5 classes dimensional example
  for (k in 1:5) {
    (cl <- kmeans(x, k))
    plot(colorBy, col = cl$cluster)
    points(cl$centers, col = 1:2, pch = 8, cex = 2)
  }

}

#-HIERARCHICAL CLUSTERING -----------------------------------------

# Standard Hierarchical Clustering
# data matrix, number of clusters, and labels

WardHierCluster = function(data,numclus,lab) {
  # Ward Hierarchical Clustering
  d <- dist(data, method = "euclidean") # distance matrix
  fit <- hclust(d, method="ward")
  plot(fit,main="Clustering of Cortical Metrics",labels=lab) # display dendogram
  groups <- cutree(fit, k=numclus) # cut tree into 6 clusters
  # draw dendogram with red borders around the 5 clusters
  rect.hclust(fit, k=numclus, border="red")
  return(fit)
}

# Sparse Hierarchical clustering (sparcl)
# This function returns a primary and complementary sparse hierarchical clustering,
# colored by some labels, y.  You should next see if the groups have biological validity

runSparclHier = function(x,y) {
  # Load sparcl library
  library('sparcl')

  # Do tuning parameter selection for sparse hierarchical clustering
  perm.out <- HierarchicalSparseCluster.permute(as.matrix(x), wbounds=c(1.5,2:6),nperms=5)
  print(perm.out)
  plot(perm.out)

  # Perform sparse hierarchical clustering
  sparsehc <- HierarchicalSparseCluster(dists=perm.out$dists,wbound=perm.out$bestw, method="complete")

  par(mfrow=c(1,2))
  plot(sparsehc)
  plot(sparsehc$hc, labels=rep("", length(y)))
  print(sparsehc)

  # Plot using knowledge of class labels in order to compare true class
  #   labels to clustering obtained
  par(mfrow=c(1,1))
  ColorDendrogram(sparsehc$hc,y=y,main="Cortical Morphometry Clustering",branchlength=.007)

  # Now look for secondary clustering after removing signal from the first
  sparsehc.comp <- HierarchicalSparseCluster(as.matrix(x),wbound=perm.out$bestw,method="complete",uorth=sparsehc$u)

  # Redo the analysis, but this time use "absolute value" dissimilarity:
  perm.out <- HierarchicalSparseCluster.permute(as.matrix(x), wbounds=c(1.5,2:6),nperms=5, dissimilarity="absolute.value")
  print(perm.out)
  plot(perm.out)

  # Perform sparse hierarchical clustering
  sparsehc.comp <- HierarchicalSparseCluster(dists=perm.out$dists, wbound=perm.out$bestw, method="complete", dissimilarity="absolute.value")
  par(mfrow=c(1,2))
  plot(sparsehc.comp)

  toreturn = list("sparsehc" = sparsehc, "comphc" = sparsehc.comp, "label" = y)
  return(toreturn)

}

# - CLUSTERING VALIDATION-----------------------------------------------

# EVALUATE SPARCL CLUSTERING
evalSparcl = function(sparcl,y) {

  library('clValid')

  # First validate with internal metrics

  internal <- clValid(sparcl$u,validation="internal",neighbSize=50, nClust=2:20)
  summary(internal)
  plot(internal)

  # Validate biologically - first create two labels based on Rx
  fc <- tapply(rownames(sparcl$u),y, c)
  bio <- clValid(sparcl$u, nClust = 2,validation="biological", annotation=fc)
  optimalScores(bio)

  toreturn = list("bio" = bio, "internal" = internal)
  return(toreturn)

}

## tidbitR.R
# Some useful functions to remember for R!

# 1) DELETE FROM A DATA FRAME

# First define the indices that you want to delete
delete = c(16,25,26,73)
# Now use subset to do it
df = subset(df, select = -delete )


# - MAKE COLORS FOR N PLOTS -------------------------------------------------

makeColors = function(N) {
    cols = lapply(seq(0,360,length = N),
		function(i){
			paste('hsl(',as.numeric(i),',50%,50%)',sep='')
		}
	)
}
	# This function will use plotly to create an online boxplot!

	plotlyBox = function(username,key,datastore) {

	# Username is your plotly username
	# API key is found via plot.ly, login --> Access plotly --> settings
	# datalist is a list of data
	# The number of plots, N, is determined by length(N)

	# Eg, put your data into a list
	# datastore = list()
	# datastore[[1]] = boxplot1
	# datastore[[2]] = boxplot2

	# Other packages that you need installed!
	# Rcurl (for postForm)
	# RJSONIO (for toJSON)

	# You can install all with devtools:
	# install.packages('devtools')
	# library('devtools')
	# devtools::install_github("plotly/R-api")

	# Plotly functions code lovely and available at:
	# https://plot.ly/r/script-demos/spectral-density-box-plot-example/

	# ------------------------------------------------

	# Install plotly (or re-install, heh)
	require('devtools')
	require('plotly')

	py <- plotly(username=username, key=key)

	# Define number of boxes
	N = length(datastore)

	# Source makeColors function to define boxplot colors
	source('http://tinyurl.com/vsochR')

	# Define colors
	col = makeColors(N)

	# Now package up colors and data!
	package = lapply(
	seq(N),
	function(i){
	list(
	y = datastore[[i]],type = 'box', marker=list(color = col[i][[1]][1])
	)
	}
	)

	# Define the layout
	layout <- list(
	xaxis = list(
	showgrid = FALSE,
	zeroline = FALSE,
	tickangle = 60,
	showticklabels = FALSE
	),
	yaxis = list(
	zeroline = FALSE,
	gridcolor = 'white'
	),
	paper_bgcolor = 'rgb(233,233,233)',
	plot_bgcolor = 'rgb(233,233,233)',
	showlegend = FALSE
	)

	# Now send to plotly!
	response <- py$plotly(package, kwargs = list(layout=layout))
	return(response)

	}
	#-- PREPROCESSING------------------------------------------------

	# Normalize data

	normalize <- function(x) {
	x <- sweep(x, 2, apply(x, 2, min))
	sweep(x, 2, apply(x, 2, max), "/")
	}

	# Merge data based on row name and drop nonshared rows
	mergeByRows = function(data1,data2){

	# Find values shared by both data
	shared.names = intersect(rownames(data1),rownames(data2))
	data1.sub = data1[which(rownames(data1) %in% shared.names),]
	data2.sub = data2[which(rownames(data2) %in% shared.names),]
	data = cbind(data1.sub,data2.sub)
	return(data)

	}

	#-K-MEANS CLUSTERING --------------------------------------------

	# Run kmeans and visualize error against number of clusters

	runKmeans = function(data) {

	require(graphics)

	x = normalize(data)

	# Determine number of clusters
	wss <- (nrow(x)-1)*sum(apply(x,2,var))
	for (i in 2:15) wss[i] <- sum(kmeans(x, centers=i)$withinss)
	plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares")

	# cluster based on 2,3,4,5 classes dimensional example
	for (k in 1:5) {
	(cl <- kmeans(x, k))
	points(cl$centers, col = 1:2, pch = 8, cex = 2)
	}

	}

	# Run kmeans and color based on a particular metric

	runKmeansColor = function(data,colorBy) {

	require(graphics)

	# Normalize data
	normalize <- function(x) {
	x <- sweep(x, 2, apply(x, 2, min))
	sweep(x, 2, apply(x, 2, max), "/")
	}

	x = normalize(data)

	# Determine number of clusters
	wss <- (nrow(x)-1)*sum(apply(x,2,var))
	for (i in 2:15) wss[i] <- sum(kmeans(x, centers=i)$withinss)
	plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares")

	# cluster based on 2,3,4,5 classes dimensional example
	for (k in 1:5) {
	(cl <- kmeans(x, k))
	plot(colorBy, col = cl$cluster)
	points(cl$centers, col = 1:2, pch = 8, cex = 2)
	}

	}

	#-HIERARCHICAL CLUSTERING -----------------------------------------

	# Standard Hierarchical Clustering
	# data matrix, number of clusters, and labels

	WardHierCluster = function(data,numclus,lab) {
	# Ward Hierarchical Clustering
	d <- dist(data, method = "euclidean") # distance matrix
	fit <- hclust(d, method="ward")
	plot(fit,main="Clustering of Cortical Metrics",labels=lab) # display dendogram
	groups <- cutree(fit, k=numclus) # cut tree into 6 clusters
	# draw dendogram with red borders around the 5 clusters
	rect.hclust(fit, k=numclus, border="red")
	return(fit)
	}

	# Sparse Hierarchical clustering (sparcl)
	# This function returns a primary and complementary sparse hierarchical clustering,
	# colored by some labels, y. You should next see if the groups have biological validity

	runSparclHier = function(x,y) {
	# Load sparcl library
	library('sparcl')

	# Do tuning parameter selection for sparse hierarchical clustering
	perm.out <- HierarchicalSparseCluster.permute(as.matrix(x), wbounds=c(1.5,2:6),nperms=5)
	print(perm.out)
	plot(perm.out)

	# Perform sparse hierarchical clustering
	sparsehc <- HierarchicalSparseCluster(dists=perm.out$dists,wbound=perm.out$bestw, method="complete")

	par(mfrow=c(1,2))
	plot(sparsehc)
	plot(sparsehc$hc, labels=rep("", length(y)))
	print(sparsehc)

	# Plot using knowledge of class labels in order to compare true class
	# labels to clustering obtained
	par(mfrow=c(1,1))
	ColorDendrogram(sparsehc$hc,y=y,main="Cortical Morphometry Clustering",branchlength=.007)

	# Now look for secondary clustering after removing signal from the first
	sparsehc.comp <- HierarchicalSparseCluster(as.matrix(x),wbound=perm.out$bestw,method="complete",uorth=sparsehc$u)

	# Redo the analysis, but this time use "absolute value" dissimilarity:
	perm.out <- HierarchicalSparseCluster.permute(as.matrix(x), wbounds=c(1.5,2:6),nperms=5, dissimilarity="absolute.value")
	print(perm.out)
	plot(perm.out)

	# Perform sparse hierarchical clustering
	sparsehc.comp <- HierarchicalSparseCluster(dists=perm.out$dists, wbound=perm.out$bestw, method="complete", dissimilarity="absolute.value")
	par(mfrow=c(1,2))
	plot(sparsehc.comp)

	toreturn = list("sparsehc" = sparsehc, "comphc" = sparsehc.comp, "label" = y)
	return(toreturn)

	}

	# - CLUSTERING VALIDATION-----------------------------------------------

	# EVALUATE SPARCL CLUSTERING
	evalSparcl = function(sparcl,y) {

	library('clValid')

	# First validate with internal metrics

	internal <- clValid(sparcl$u,validation="internal",neighbSize=50, nClust=2:20)
	summary(internal)
	plot(internal)

	# Validate biologically - first create two labels based on Rx
	fc <- tapply(rownames(sparcl$u),y, c)
	bio <- clValid(sparcl$u, nClust = 2,validation="biological", annotation=fc)
	optimalScores(bio)

	toreturn = list("bio" = bio, "internal" = internal)
	return(toreturn)

	}
	# Some useful functions to remember for R!

	# 1) DELETE FROM A DATA FRAME

	# First define the indices that you want to delete
	delete = c(16,25,26,73)
	# Now use subset to do it
	df = subset(df, select = -delete )


	# - MAKE COLORS FOR N PLOTS -------------------------------------------------

	makeColors = function(N) {
	cols = lapply(seq(0,360,length = N),
	function(i){
	paste('hsl(',as.numeric(i),',50%,50%)',sep='')
	}
	)
	}