Skip to content

Instantly share code, notes, and snippets.

@dsparks
Created February 14, 2011 13:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dsparks/825881 to your computer and use it in GitHub Desktop.
Save dsparks/825881 to your computer and use it in GitHub Desktop.
Bootstrap simulations from data, to compare to empirical PCA, functions only.
require(ggplot2)
### Boot ###
DimensionTester <- function(object, niterations = 50, dimstotest = 0){
if(length(dimstotest) == 1){dimstotest <- 1:(ncol(object)*2)}
Parameters <- expand.grid(1:niterations, dimstotest)
DoPCA <- function(dims){
Out <- rep(NA, max(dimstotest))
ToScramble <- as.matrix(object[, sample(1:ncol(object), dims, replace = T)])
ToScramble <- apply(ToScramble, 2, function(cc){sample(cc, length(cc), replace = T)})
ToScramble[, apply(ToScramble, 2, sd) == 0] <- ToScramble[, apply(ToScramble, 2, sd) == 0] + rnorm(nrow(ToScramble)) # This "fixes" columns with no variance
BootPCA <- prcomp(ToScramble, center = T, scale. = T)
Out[1:dims] <- (BootPCA$sdev ^ 2) / sum(BootPCA$sdev ^ 2)
Out <- t(Out)
rownames(Out) <- dims
plot(c(Out))
return(Out)
}
BootSDs <- do.call(rbind, lapply(Parameters[, 2], DoPCA))
ENComponents <- apply(BootSDs, 1, function(x){sum(x, na.rm = T)^2 / sum(x^2, na.rm = T)})
ActualSD <- prcomp(object, center = T, scale. = T)$sdev ^ 2 / sum(prcomp(object, center = T, scale. = T)$sdev ^ 2)
Actual <- data.frame(Component = 1:length(ActualSD), Variance = ActualSD)
ENActual <- sum(ActualSD, na.rm = T)^2 / sum(ActualSD^2, na.rm = T)
MeanProximity <- c((ENActual - by(ENComponents, names(ENComponents), mean)))[unique(names(ENComponents))]
NearestIntegerDimension <- as.numeric(names(which.min(abs(MeanProximity))))
ToReturn <- vector("list")
ToReturn$BootSDs <- BootSDs
ToReturn$Actual <- Actual
ToReturn$ENComponents <- ENComponents
ToReturn$ENActual <- ENActual
ToReturn$MeanProximity <- MeanProximity
ToReturn$NearestIntegerDimension <- NearestIntegerDimension
return(ToReturn)
}
### Plot ###
DimensionTestPlotter <- function(dimtestobject){
Xs <- as.numeric(rownames(dimtestobject$BootSDs))
YBreaks <- unique(Xs)
YBreaks[which.min(abs(YBreaks - dimtestobject$ENActual))] <- dimtestobject$ENActual
YBreaks <- round(YBreaks, 2)
ColorList <- rep(gray(0), length(YBreaks))
ColorList[which.min(abs(YBreaks - dimtestobject$ENActual))] <- hsv(0/12, 7/12, 7/12)
PlotFrame <- data.frame(Xs, dimtestobject$ENComponents)
colnames(PlotFrame) <- c("Xs", "ENC")
ENPlot <- ggplot(data = PlotFrame, aes(x = Xs, y = ENC),
geom = "blank") + theme_bw()
ENPlot <- ENPlot + geom_hline(yintercept = dimtestobject$ENActual, colour = I(hsv(0/12, 7/12, 7/12)), lwd = I(1), alpha = I(7/12))
ENPlot <- ENPlot + geom_point(aes(x = Xs, y = ENC), alpha = I(1/(max(Xs)^(1/2))))
ENPlot <- ENPlot + scale_y_continuous("Effective Number of Components", breaks = YBreaks, labels = gsub(".00", "", YBreaks))
ENPlot <- ENPlot + scale_x_continuous("True Orthogonal Dimensions", breaks = unique(Xs))
ENPlot <- ENPlot + opts(axis.text.y = theme_text(size = 8, hjust = 1, colour = ColorList))
ENPlot <- ENPlot + opts(axis.text.x = theme_text(size = 8, vjust = 1))
ENPlot <- ENPlot + opts(title = "True Orthogonal Dimensional Equivalent")
return(ENPlot)
}
### Check plot ###
CheckPlot <- function(dimtestobject){
Xs <- as.numeric(rownames(dimtestobject$BootSDs))
YBreaks <- unique(Xs)
YBreaks[which.min(abs(YBreaks - dimtestobject$ENActual))] <- dimtestobject$ENActual
YBreaks <- round(YBreaks, 2)
ColorList <- rep(gray(0), length(YBreaks))
ColorList[which.min(abs(YBreaks - dimtestobject$ENActual))] <- hsv(0/12, 7/12, 7/12)
PlotFrame <- data.frame(Xs, abs(dimtestobject$ENActual - dimtestobject$ENComponents))
colnames(PlotFrame) <- c("Xs", "ENC")
ENPlot <- ggplot(data = PlotFrame, aes(x = Xs, y = ENC),
geom = "blank") + theme_bw()
ENPlot <- ENPlot + geom_hline(yintercept = 0, colour = I(hsv(0/12, 7/12, 7/12)), alpha = I(7/12))
ENPlot <- ENPlot + geom_point(aes(x = Xs, y = ENC), alpha = I(1/(max(Xs)^(1/2))))
ENPlot <- ENPlot + scale_y_continuous("|Simulated - Observed|")
ENPlot <- ENPlot + scale_x_continuous("True Orthogonal Dimensions", breaks = unique(Xs))
ENPlot <- ENPlot + opts(axis.text.y = theme_text(size = 8, hjust = 1))
ENPlot <- ENPlot + opts(axis.text.x = theme_text(size = 8, vjust = 1))
ENPlot <- ENPlot + opts(title = "True Orthogonal Dimensional Equivalent")
return(ENPlot)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment