Last active
November 27, 2018 12:06
-
-
Save ajstewartlang/f009ed2deb6e86e5416f5b5906bbafa1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note this uses the new version of gganimate by Thomas Lin Pedersen - @thomasp85 | |
# it is not yet on CRAN so need to use devtools to install it | |
# devtools::install_github('thomasp85/gganimate') | |
library(gganimate) # New version of gganimate | |
library(tidyverse) # All praise the tidyverse | |
library(MASS) # Needed to sample from multivariate distribution | |
# Simulating multivariate data with specific covariance structure | |
# Use the mvrnorm() function from the MASS package | |
# A vector of means of our two variables | |
mu <- c(1000, 2000) | |
# Covariance of our 2 variables | |
# It is equal to Pearson's R * SD_var1 * SD_var2 | |
# If we know the variance for each of our variables we can calculate the sd | |
# We can then use these values to work out the covariance we need for any | |
# particular Pearson's r value | |
# For the below example to give us a Pearson's r of .5 | |
# we have covariance = .5 * sqrt(100) * sqrt(50) which gives us 35.35534 | |
myr <- 35.35534 | |
# The covariance matrix where we have the variance of variable 1, | |
# the covariance of variables 1 and 2 | |
# the covariance of variables 1 and 2 and the variance of variable 2 | |
mysigma <- matrix(c(100, myr, myr, 50), 2, 2) | |
# Animating sampling many times and illustrating varation in our regression line (and our correlation | |
# calculation) due to sampling error. | |
data <- NULL | |
sample_size <- 50 | |
set.seed(1234) | |
for (i in 2:10) { | |
sample <- data.frame(mvrnorm(sample_size, mu, mysigma)) | |
sample$sample <- i | |
data <- rbind(sample, data) | |
} | |
# Now set the first sample so the correlation is identical to the correlation in the multivariate | |
# population we're sampling from | |
# Need to reset empirical to TRUE for this to work | |
sample <- data.frame(mvrnorm(sample_size, mu, mysigma, empirical = TRUE)) | |
sample$sample <- 1 | |
data <- rbind(sample, data) | |
colnames(data) <- c("Var_1", "Var_2", "Sample") | |
ggplot(data, aes(x = Var_1, y = Var_2)) + | |
geom_point() + | |
geom_smooth(method = "lm", se = FALSE) + | |
transition_time(Sample) + | |
labs(x = "Variable 1", y = "Variable 2", | |
title = "10 samples where each sample size = 50\nSample number: {as.integer(frame_time)}") + | |
theme(title = element_text(size = 15)) + | |
ease_aes('linear') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment