Skip to content

Instantly share code, notes, and snippets.

@dsparks
Created February 12, 2011 21:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dsparks/824142 to your computer and use it in GitHub Desktop.
Save dsparks/824142 to your computer and use it in GitHub Desktop.
Bootstrap simulations from data, to compare to empirical PCA, "checkplots"?
require(ggplot2)
### Boot ###
DimensionTester <- function(object, niterations = 50, dimstotest = 0){
if(length(dimstotest) == 1){dimstotest <- 1:(ncol(object)*2)}
Parameters <- expand.grid(1:niterations, dimstotest)
DoPCA <- function(dims){
Out <- rep(NA, max(dimstotest))
ToScramble <- as.matrix(object[, sample(1:ncol(object), dims, replace = T)])
ToScramble <- apply(ToScramble, 2, function(cc){sample(cc, length(cc), replace = T)})
BootPCA <- prcomp(ToScramble, center = T, scale. = T)
Out[1:dims] <- (BootPCA$sdev ^ 2) / sum(BootPCA$sdev ^ 2)
Out <- t(Out)
rownames(Out) <- dims
plot(c(Out))
return(Out)
}
BootSDs <- do.call(rbind, lapply(Parameters[, 2], DoPCA))
ENComponents <- apply(BootSDs, 1, function(x){sum(x, na.rm = T)^2 / sum(x^2, na.rm = T)})
ActualSD <- prcomp(object, center = T, scale. = T)$sdev ^ 2 / sum(prcomp(object, center = T, scale. = T)$sdev ^ 2)
Actual <- data.frame(Component = 1:length(ActualSD), Variance = ActualSD)
ENActual <- sum(ActualSD, na.rm = T)^2 / sum(ActualSD^2, na.rm = T)
MeanProximity <- c((ENActual - by(ENComponents, names(ENComponents), mean)))[unique(names(ENComponents))]
NearestIntegerDimension <- as.numeric(names(which.min(abs(MeanProximity))))
ToReturn <- vector("list")
ToReturn$BootSDs <- BootSDs
ToReturn$Actual <- Actual
ToReturn$ENComponents <- ENComponents
ToReturn$ENActual <- ENActual
ToReturn$MeanProximity <- MeanProximity
ToReturn$NearestIntegerDimension <- NearestIntegerDimension
return(ToReturn)
}
### Plot ###
DimensionTestPlotter <- function(dimtestobject){
Xs <- as.numeric(rownames(dimtestobject$BootSDs))
YBreaks <- unique(Xs)
YBreaks[which.min(abs(YBreaks - dimtestobject$ENActual))] <- dimtestobject$ENActual
YBreaks <- round(YBreaks, 2)
ColorList <- rep(gray(0), length(YBreaks))
ColorList[which.min(abs(YBreaks - dimtestobject$ENActual))] <- hsv(0/12, 7/12, 7/12)
PlotFrame <- data.frame(Xs, dimtestobject$ENComponents)
colnames(PlotFrame) <- c("Xs", "ENC")
ENPlot <- ggplot(data = PlotFrame, aes(x = Xs, y = ENC),
geom = "blank") + theme_bw()
ENPlot <- ENPlot + geom_hline(yintercept = dimtestobject$ENActual, colour = I(hsv(0/12, 7/12, 7/12)), lwd = I(1), alpha = I(7/12))
ENPlot <- ENPlot + geom_point(aes(x = Xs, y = ENC), alpha = I(1/(max(Xs)^(1/2))))
ENPlot <- ENPlot + scale_y_continuous("Effective Number of Components", breaks = YBreaks, labels = gsub(".00", "", YBreaks))
ENPlot <- ENPlot + scale_x_continuous("True Orthogonal Dimensions", breaks = unique(Xs))
ENPlot <- ENPlot + opts(axis.text.y = theme_text(size = 8, hjust = 1, colour = ColorList))
ENPlot <- ENPlot + opts(axis.text.x = theme_text(size = 8, vjust = 1))
ENPlot <- ENPlot + opts(title = "True Orthogonal Dimensional Equivalent")
return(ENPlot)
}
### Check plot ###
CheckPlot <- function(dimtestobject){
Xs <- as.numeric(rownames(dimtestobject$BootSDs))
YBreaks <- unique(Xs)
YBreaks[which.min(abs(YBreaks - dimtestobject$ENActual))] <- dimtestobject$ENActual
YBreaks <- round(YBreaks, 2)
ColorList <- rep(gray(0), length(YBreaks))
ColorList[which.min(abs(YBreaks - dimtestobject$ENActual))] <- hsv(0/12, 7/12, 7/12)
PlotFrame <- data.frame(Xs, abs(dimtestobject$ENActual - dimtestobject$ENComponents))
colnames(PlotFrame) <- c("Xs", "ENC")
ENPlot <- ggplot(data = PlotFrame, aes(x = Xs, y = ENC),
geom = "blank") + theme_bw()
ENPlot <- ENPlot + geom_hline(yintercept = 0, colour = I(hsv(0/12, 7/12, 7/12)), alpha = I(7/12))
ENPlot <- ENPlot + geom_point(aes(x = Xs, y = ENC), alpha = I(1/(max(Xs)^(1/2))))
ENPlot <- ENPlot + scale_y_continuous("|Simulated - Observed|")
ENPlot <- ENPlot + scale_x_continuous("True Orthogonal Dimensions", breaks = unique(Xs))
ENPlot <- ENPlot + opts(axis.text.y = theme_text(size = 8, hjust = 1))
ENPlot <- ENPlot + opts(axis.text.x = theme_text(size = 8, vjust = 1))
ENPlot <- ENPlot + opts(title = "True Orthogonal Dimensional Equivalent")
return(ENPlot)
}
### Test ###
NDim <- 8
Beta <- matrix(rnorm(100*NDim, 0, 1), ncol = NDim)
Beta <- data.frame(Beta, Beta + matrix(rnorm(100*NDim, 0, 1/2), ncol = NDim))
BetaTest <- DimensionTester(Beta, niterations = 50)
DimensionTestPlotter(BetaTest)
CheckPlot(BetaTest)
###
NDim <- 8
Gamma <- matrix(rnorm(100*NDim, 0, 1), ncol = NDim)
Gamma <- data.frame(Gamma, Gamma + matrix(rnorm(100*NDim, 0, 1/2), ncol = NDim))
Gamma <- (Gamma > 0) * 1
GammaTest <- DimensionTester(Gamma, niterations = 50)
DimensionTestPlotter(GammaTest)
CheckPlot(GammaTest)
###
data(baseball)
Baseball <- baseball[complete.cases(baseball) & baseball$year == 2007, 7:22]
Baseball <- Baseball# / rowSums(Baseball, na.rm = T)
Baseball[is.na(Baseball)] <- 0
BaseballTest <- DimensionTester(Baseball, dimstotest = 1:10, niterations = 100)
DimensionTestPlotter(BaseballTest)
CheckPlot(BaseballTest)
###
data(USJudgeRatings) # Lawyers' Ratings of State Judges in the US Superior Court
head(USJudgeRatings)
plot(prcomp(USJudgeRatings, center = T, scale. = T))
JudgeTest <- DimensionTester(USJudgeRatings, niterations = 50)
DimensionTestPlotter(JudgeTest)
CheckPlot(JudgeTest)
###
data(mtcars) # Motor Trend Car Road Tests
head(mtcars)
plot(prcomp(mtcars, center = T, scale. = T))
CarTest <- DimensionTester(mtcars, niterations = 50)
DimensionTestPlotter(CarTest)
CheckPlot(CarTest)
###
data(randu) # Random Numbers from Congruential Generator RANDU
head(randu)
plot(prcomp(randu, center = T, scale. = T))
RandTest <- DimensionTester(randu, niterations = 50)
DimensionTestPlotter(RandTest)
CheckPlot(RandTest)
###
data(attitude) # The Chatterjee-Price Attitude Data
head(attitude)
plot(prcomp(attitude, center = T, scale. = T))
AttitudeTest <- DimensionTester(attitude, niterations = 50)
DimensionTestPlotter(AttitudeTest)
CheckPlot(AttitudeTest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment