Skip to content
Create a gist now

Instantly share code, notes, and snippets.

Embed URL


Subversion checkout URL

You can clone with
Download ZIP
# multivariate normal example using Cholesky decomposition
#make this reproducible
#how many draws in our starting data set?
n <- 1e4
# how many draws do we want from this distribution?
drawCount <- 1e4
myData <- rnorm(n, 5, .6)
yourData <- myData + rnorm(n, 8, .25)
hisData <- myData + rnorm(n, 6, .4)
herData <- hisData + rnorm(n, 8, .35)
ourData <- data.frame(myData, yourData, hisData, herData)
# now we have raw correlations in the 70s, 80s, and 90s. Funny how that
# works out
ourData0 <- ourData
# shift the mean of ourData to zero
ourData0 <-,2,colMeans(ourData),"-"))
#Cholesky Decomposition of the covariance matrix
C <- chol(nearPD(cov(ourData0))$mat)
#create a matrix of random standard normals
Z <- matrix(rnorm(n * ncol(ourData)), ncol(ourData))
#multiply the standard normals by the transpose of the Cholesky
X <- t(C) %*% Z
#look at the covariances
myDraws <- data.frame(as.matrix(t(X)))
names(myDraws) <- names(ourData)
# this method handles the covariance (correlation and standard deviation)
# but we still need to shift the means of the samples.
# shift the mean of the draws over to match the starting data
myDraws <-,2,colMeans(ourData),"+"))
#check the mean and sd
apply(myDraws, 2, mean)
apply(myDraws, 2, sd)
#let's look at the mean and standard dev of the starting data
apply(ourData, 2, mean)
apply(ourData, 2, sd)
# so myDraws contains the final draws
# let's check Kolmogorov-Smirnov between the starting data
# and the final draws
for (i in 1:ncol(ourData)){
print(ks.test(myDraws[[i]], ourData[[i]]))
#look at the correlation matrices
#it's fun to plot the variables and see if the PDFs line up
#It's a good sanity check. Using ggplot2 to plot
# rearrange the data to be "tall" not "wide"
meltDraws <-melt(myDraws)
meltDraws$dataSource <- "simulated"
meltData <- melt(ourData)
meltData$dataSource <- "original"
plotData <- rbind(meltData, meltDraws)
qplot(value, colour=dataSource, data=plotData, geom="density")+ facet_wrap(~variable)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.