Skip to content

Instantly share code, notes, and snippets.

@CerebralMastication
Created September 2, 2010 17:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save CerebralMastication/562567 to your computer and use it in GitHub Desktop.
Save CerebralMastication/562567 to your computer and use it in GitHub Desktop.
# multivariate normal example using Cholesky decomposition
require(Matrix)
#make this reproducible
set.seed(2)
#how many draws in our starting data set?
n <- 1e4
# how many draws do we want from this distribution?
drawCount <- 1e4
myData <- rnorm(n, 5, .6)
yourData <- myData + rnorm(n, 8, .25)
hisData <- myData + rnorm(n, 6, .4)
herData <- hisData + rnorm(n, 8, .35)
ourData <- data.frame(myData, yourData, hisData, herData)
# now we have raw correlations in the 70s, 80s, and 90s. Funny how that
# works out
cor(ourData)
ourData0 <- ourData
# shift the mean of ourData to zero
ourData0 <- as.data.frame(sweep(ourData,2,colMeans(ourData),"-"))
#Cholesky Decomposition of the covariance matrix
C <- chol(nearPD(cov(ourData0))$mat)
#create a matrix of random standard normals
Z <- matrix(rnorm(n * ncol(ourData)), ncol(ourData))
#multiply the standard normals by the transpose of the Cholesky
X <- t(C) %*% Z
#look at the covariances
cov(as.matrix(t(X)))
cov(ourData0)
#woot!
myDraws <- data.frame(as.matrix(t(X)))
names(myDraws) <- names(ourData)
# this method handles the covariance (correlation and standard deviation)
# but we still need to shift the means of the samples.
# shift the mean of the draws over to match the starting data
myDraws <- as.data.frame(sweep(myDraws,2,colMeans(ourData),"+"))
#check the mean and sd
apply(myDraws, 2, mean)
apply(myDraws, 2, sd)
#let's look at the mean and standard dev of the starting data
apply(ourData, 2, mean)
apply(ourData, 2, sd)
# so myDraws contains the final draws
# let's check Kolmogorov-Smirnov between the starting data
# and the final draws
for (i in 1:ncol(ourData)){
print(ks.test(myDraws[[i]], ourData[[i]]))
}
#look at the correlation matrices
cor(myDraws)
cor(ourData)
#it's fun to plot the variables and see if the PDFs line up
#It's a good sanity check. Using ggplot2 to plot
require(ggplot2)
# rearrange the data to be "tall" not "wide"
meltDraws <-melt(myDraws)
meltDraws$dataSource <- "simulated"
meltData <- melt(ourData)
meltData$dataSource <- "original"
plotData <- rbind(meltData, meltDraws)
qplot(value, colour=dataSource, data=plotData, geom="density")+ facet_wrap(~variable)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment