Last active
December 26, 2015 00:09
-
-
Save vsoch/7061899 to your computer and use it in GitHub Desktop.
R Functions in the Cloud!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This function will use plotly to create an online boxplot! | |
plotlyBox = function(username,key,datastore) { | |
# Username is your plotly username | |
# API key is found via plot.ly, login --> Access plotly --> settings | |
# datalist is a list of data | |
# The number of plots, N, is determined by length(N) | |
# Eg, put your data into a list | |
# datastore = list() | |
# datastore[[1]] = boxplot1 | |
# datastore[[2]] = boxplot2 | |
# Other packages that you need installed! | |
# Rcurl (for postForm) | |
# RJSONIO (for toJSON) | |
# You can install all with devtools: | |
# install.packages('devtools') | |
# library('devtools') | |
# devtools::install_github("plotly/R-api") | |
# Plotly functions code lovely and available at: | |
# https://plot.ly/r/script-demos/spectral-density-box-plot-example/ | |
# ------------------------------------------------ | |
# Install plotly (or re-install, heh) | |
require('devtools') | |
require('plotly') | |
py <- plotly(username=username, key=key) | |
# Define number of boxes | |
N = length(datastore) | |
# Source makeColors function to define boxplot colors | |
source('http://tinyurl.com/vsochR') | |
# Define colors | |
col = makeColors(N) | |
# Now package up colors and data! | |
package = lapply( | |
seq(N), | |
function(i){ | |
list( | |
y = datastore[[i]],type = 'box', marker=list(color = col[i][[1]][1]) | |
) | |
} | |
) | |
# Define the layout | |
layout <- list( | |
xaxis = list( | |
showgrid = FALSE, | |
zeroline = FALSE, | |
tickangle = 60, | |
showticklabels = FALSE | |
), | |
yaxis = list( | |
zeroline = FALSE, | |
gridcolor = 'white' | |
), | |
paper_bgcolor = 'rgb(233,233,233)', | |
plot_bgcolor = 'rgb(233,233,233)', | |
showlegend = FALSE | |
) | |
# Now send to plotly! | |
response <- py$plotly(package, kwargs = list(layout=layout)) | |
return(response) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-- PREPROCESSING------------------------------------------------ | |
# Normalize data | |
normalize <- function(x) { | |
x <- sweep(x, 2, apply(x, 2, min)) | |
sweep(x, 2, apply(x, 2, max), "/") | |
} | |
# Merge data based on row name and drop nonshared rows | |
mergeByRows = function(data1,data2){ | |
# Find values shared by both data | |
shared.names = intersect(rownames(data1),rownames(data2)) | |
data1.sub = data1[which(rownames(data1) %in% shared.names),] | |
data2.sub = data2[which(rownames(data2) %in% shared.names),] | |
data = cbind(data1.sub,data2.sub) | |
return(data) | |
} | |
#-K-MEANS CLUSTERING -------------------------------------------- | |
# Run kmeans and visualize error against number of clusters | |
runKmeans = function(data) { | |
require(graphics) | |
x = normalize(data) | |
# Determine number of clusters | |
wss <- (nrow(x)-1)*sum(apply(x,2,var)) | |
for (i in 2:15) wss[i] <- sum(kmeans(x, centers=i)$withinss) | |
plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares") | |
# cluster based on 2,3,4,5 classes dimensional example | |
for (k in 1:5) { | |
(cl <- kmeans(x, k)) | |
points(cl$centers, col = 1:2, pch = 8, cex = 2) | |
} | |
} | |
# Run kmeans and color based on a particular metric | |
runKmeansColor = function(data,colorBy) { | |
require(graphics) | |
# Normalize data | |
normalize <- function(x) { | |
x <- sweep(x, 2, apply(x, 2, min)) | |
sweep(x, 2, apply(x, 2, max), "/") | |
} | |
x = normalize(data) | |
# Determine number of clusters | |
wss <- (nrow(x)-1)*sum(apply(x,2,var)) | |
for (i in 2:15) wss[i] <- sum(kmeans(x, centers=i)$withinss) | |
plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares") | |
# cluster based on 2,3,4,5 classes dimensional example | |
for (k in 1:5) { | |
(cl <- kmeans(x, k)) | |
plot(colorBy, col = cl$cluster) | |
points(cl$centers, col = 1:2, pch = 8, cex = 2) | |
} | |
} | |
#-HIERARCHICAL CLUSTERING ----------------------------------------- | |
# Standard Hierarchical Clustering | |
# data matrix, number of clusters, and labels | |
WardHierCluster = function(data,numclus,lab) { | |
# Ward Hierarchical Clustering | |
d <- dist(data, method = "euclidean") # distance matrix | |
fit <- hclust(d, method="ward") | |
plot(fit,main="Clustering of Cortical Metrics",labels=lab) # display dendogram | |
groups <- cutree(fit, k=numclus) # cut tree into 6 clusters | |
# draw dendogram with red borders around the 5 clusters | |
rect.hclust(fit, k=numclus, border="red") | |
return(fit) | |
} | |
# Sparse Hierarchical clustering (sparcl) | |
# This function returns a primary and complementary sparse hierarchical clustering, | |
# colored by some labels, y. You should next see if the groups have biological validity | |
runSparclHier = function(x,y) { | |
# Load sparcl library | |
library('sparcl') | |
# Do tuning parameter selection for sparse hierarchical clustering | |
perm.out <- HierarchicalSparseCluster.permute(as.matrix(x), wbounds=c(1.5,2:6),nperms=5) | |
print(perm.out) | |
plot(perm.out) | |
# Perform sparse hierarchical clustering | |
sparsehc <- HierarchicalSparseCluster(dists=perm.out$dists,wbound=perm.out$bestw, method="complete") | |
par(mfrow=c(1,2)) | |
plot(sparsehc) | |
plot(sparsehc$hc, labels=rep("", length(y))) | |
print(sparsehc) | |
# Plot using knowledge of class labels in order to compare true class | |
# labels to clustering obtained | |
par(mfrow=c(1,1)) | |
ColorDendrogram(sparsehc$hc,y=y,main="Cortical Morphometry Clustering",branchlength=.007) | |
# Now look for secondary clustering after removing signal from the first | |
sparsehc.comp <- HierarchicalSparseCluster(as.matrix(x),wbound=perm.out$bestw,method="complete",uorth=sparsehc$u) | |
# Redo the analysis, but this time use "absolute value" dissimilarity: | |
perm.out <- HierarchicalSparseCluster.permute(as.matrix(x), wbounds=c(1.5,2:6),nperms=5, dissimilarity="absolute.value") | |
print(perm.out) | |
plot(perm.out) | |
# Perform sparse hierarchical clustering | |
sparsehc.comp <- HierarchicalSparseCluster(dists=perm.out$dists, wbound=perm.out$bestw, method="complete", dissimilarity="absolute.value") | |
par(mfrow=c(1,2)) | |
plot(sparsehc.comp) | |
toreturn = list("sparsehc" = sparsehc, "comphc" = sparsehc.comp, "label" = y) | |
return(toreturn) | |
} | |
# - CLUSTERING VALIDATION----------------------------------------------- | |
# EVALUATE SPARCL CLUSTERING | |
evalSparcl = function(sparcl,y) { | |
library('clValid') | |
# First validate with internal metrics | |
internal <- clValid(sparcl$u,validation="internal",neighbSize=50, nClust=2:20) | |
summary(internal) | |
plot(internal) | |
# Validate biologically - first create two labels based on Rx | |
fc <- tapply(rownames(sparcl$u),y, c) | |
bio <- clValid(sparcl$u, nClust = 2,validation="biological", annotation=fc) | |
optimalScores(bio) | |
toreturn = list("bio" = bio, "internal" = internal) | |
return(toreturn) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Some useful functions to remember for R! | |
# 1) DELETE FROM A DATA FRAME | |
# First define the indices that you want to delete | |
delete = c(16,25,26,73) | |
# Now use subset to do it | |
df = subset(df, select = -delete ) | |
# - MAKE COLORS FOR N PLOTS ------------------------------------------------- | |
makeColors = function(N) { | |
cols = lapply(seq(0,360,length = N), | |
function(i){ | |
paste('hsl(',as.numeric(i),',50%,50%)',sep='') | |
} | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment