public

  • Download Gist
multivariate random cholesky.R
R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
# multivariate normal example using Cholesky decomposition
 
require(Matrix)
 
#make this reproducible
set.seed(2)
 
#how many draws in our starting data set?
n <- 1e4
 
# how many draws do we want from this distribution?
drawCount <- 1e4
 
myData <- rnorm(n, 5, .6)
yourData <- myData + rnorm(n, 8, .25)
hisData <- myData + rnorm(n, 6, .4)
herData <- hisData + rnorm(n, 8, .35)
 
ourData <- data.frame(myData, yourData, hisData, herData)
 
# now we have raw correlations in the 70s, 80s, and 90s. Funny how that
# works out
cor(ourData)
 
ourData0 <- ourData
 
# shift the mean of ourData to zero
ourData0 <- as.data.frame(sweep(ourData,2,colMeans(ourData),"-"))
 
#Cholesky Decomposition of the covariance matrix
C <- chol(nearPD(cov(ourData0))$mat)
 
#create a matrix of random standard normals
Z <- matrix(rnorm(n * ncol(ourData)), ncol(ourData))
 
#multiply the standard normals by the transpose of the Cholesky
X <- t(C) %*% Z
 
#look at the covariances
cov(as.matrix(t(X)))
cov(ourData0)
#woot!
 
myDraws <- data.frame(as.matrix(t(X)))
names(myDraws) <- names(ourData)
# this method handles the covariance (correlation and standard deviation)
# but we still need to shift the means of the samples.
 
# shift the mean of the draws over to match the starting data
myDraws <- as.data.frame(sweep(myDraws,2,colMeans(ourData),"+"))
 
 
#check the mean and sd
apply(myDraws, 2, mean)
apply(myDraws, 2, sd)
 
#let's look at the mean and standard dev of the starting data
apply(ourData, 2, mean)
apply(ourData, 2, sd)
 
# so myDraws contains the final draws
# let's check Kolmogorov-Smirnov between the starting data
# and the final draws
 
for (i in 1:ncol(ourData)){
print(ks.test(myDraws[[i]], ourData[[i]]))
}
 
#look at the correlation matrices
cor(myDraws)
cor(ourData)
 
#it's fun to plot the variables and see if the PDFs line up
#It's a good sanity check. Using ggplot2 to plot
 
require(ggplot2)
 
# rearrange the data to be "tall" not "wide"
meltDraws <-melt(myDraws)
meltDraws$dataSource <- "simulated"
meltData <- melt(ourData)
meltData$dataSource <- "original"
plotData <- rbind(meltData, meltDraws)
 
qplot(value, colour=dataSource, data=plotData, geom="density")+ facet_wrap(~variable)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.