Last active
August 29, 2015 14:11
-
-
Save psychemedia/6144aa51bbe078d30a31 to your computer and use it in GitHub Desktop.
Scatterplots for differently shaped random distributions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Source: Reich Seiter, http://rpubs.com/rseiter/50148 | |
#NORmal-To-Anything (NORTA) Algorithm Examples | |
#Context: http://blog.ouseful.info/2014/12/17/sketching-scatterplots-to-demonstrate-different-correlations/ | |
# Create a multi-variate <dist> distribution with <samples> samples and correlation <r> | |
# Distribution names as in help("Distributions") | |
# The distribution names are used to form the quantile function name q<dist> | |
# Currently additional arguments to the quantile function are not supported | |
corrdata <- function(samples=200, r=0, dist="norm"){ | |
data = mvrnorm(n=samples, mu=c(0, 0), Sigma=matrix(c(1, r, r, 1), nrow=2), empirical=TRUE) | |
distfunname <- paste0("q", dist) | |
if (!exists(distfunname)) { error(paste("Function", distfunname, "does not exist")) } | |
# Convert the multi-variate normal variables to the desired distribution using NORTA | |
data <- eval(parse(text=paste0(distfunname, "(pnorm(data))"))) | |
# Check that the correlation has not changed | |
writeLines(paste("Desired correlation for dist", dist, "was", r, | |
" Actual correlation was", round(cor(data)[1,2],3))) | |
data.frame(x=data[,1], y=data[,2]) | |
} | |
plotcorrdata <- function(samples=200, dist="norm") { | |
df <- data.frame() | |
for (i in c(1,0.8,0.5,0.2,0,-0.2,-0.5,-0.8,-1)) { | |
tmp <- corrdata(samples, i, dist) | |
tmp['corr'] <- i | |
df <- rbind(df, tmp) | |
} | |
g <- ggplot(df, aes(x=x,y=y)) + geom_point(size=1) | |
g <- g + facet_wrap(~corr) + stat_smooth(method='lm',se=FALSE,color='red') | |
g <- g + coord_fixed() + ggtitle(paste("Scatterplots for Correlated", dist, "Random Variables")) | |
plot(g) | |
} | |
dists <- c("norm", "unif", "exp") | |
for (dist in dists) { | |
plotcorrdata(samples=200, dist=dist) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment