library(ggplot2) set.seed(213141) x <- rnorm(250, 45, 5) epsilon1 <- rnorm(250, 0, 1) epsilon2 <- rnorm(250, 0, 2) epsilon3 <- rnorm(250, 0, 5) epsilon4 <- rnorm(250, 0, 10) sd_val <- c(1, 2, 5, 10) epsilons <- list(epsilon1, epsilon2, epsilon3, epsilon4) slopes <- c(-5, -2, -1, -0.25, 0, 0.25, 1, 2, 5) y_vals <- vector("list", length(epsilons) * length(slopes)) k <- 1 for (i in 1:length(epsilons)) { print(i) for (j in 1:length(slopes)) { y_vals[[k]] <- 25 + slopes[j] * x + epsilons[[i]] k <- k + 1 print(j) } } i_val <- rep(1:length(epsilons), each = length(slopes)) j_val <- rep(1:length(slopes), times = length(epsilons)) orig_cor <- c() trunc_cor <- c() est_slope <- c() est_slope_trunc <- c() orig_sd_ratio <- c() trunc_sd_ratio <- c() orig_sd_y <- c() trunc_sd_y <- c() orig_sd_x <- c() trunc_sd_x <- c() tails <- which(x < quantile(x, 0.05) | x > quantile(x, 0.95)) x_trunc <- x[-tails] for (i in 1:length(y_vals)) { orig_cor <- c(orig_cor, cor(x, y_vals[[i]])) y_trunc <- y_vals[[i]][-tails] trunc_cor <- c(trunc_cor, cor(x_trunc, y_trunc)) est_slope <- c(est_slope, unname(coef(lm(y_vals[[i]] ~ x))[2])) est_slope_trunc <- c(est_slope_trunc, unname(coef(lm(y_trunc ~ x_trunc))[2])) orig_sd_y <- c(orig_sd_y, sd(y_vals[[i]])) orig_sd_x <- c(orig_sd_x, sd(x)) trunc_sd_y <- c(trunc_sd_y, sd(y_trunc)) trunc_sd_x <- c(trunc_sd_x, sd(x_trunc)) orig_sd_ratio <- c(orig_sd_ratio, sd(y_vals[[i]])/sd(x)) trunc_sd_ratio <- c(trunc_sd_ratio, sd(y_trunc)/sd(x_trunc)) } toP <- cbind.data.frame( epsilon_sd = sd_val[i_val], true_slope = slopes[j_val], orig_cor = orig_cor, trunc_cor = trunc_cor, est_slope = est_slope, est_slope_trunc = est_slope_trunc, orig_sd_ratio = orig_sd_ratio, trunc_sd_ratio = trunc_sd_ratio, orig_sd_y = orig_sd_y, orig_sd_x = orig_sd_x, trunc_sd_y = trunc_sd_y, trunc_sd_x = trunc_sd_x ) ggplot(toP, aes(orig_cor, trunc_cor)) + geom_point() + geom_abline(intercept = 0, slope = 1) + geom_vline(xintercept = 0, lty = 2) + theme_minimal() + xlab("original correlation") + ylab("truncated correlation") ggplot(toP, aes(abs(orig_cor), abs(trunc_cor))) + geom_point() + geom_abline(intercept = 0, slope = 1) + theme_minimal() + xlab("|original correlation|") + ylab("|truncated correlation|") ggplot(toP, aes(est_slope, est_slope_trunc)) + geom_point() + geom_abline(intercept = 0, slope = 1) + theme_minimal() + xlab("original estimated slope") + ylab("truncated estimated slope") ggplot(toP, aes(orig_sd_ratio, trunc_sd_ratio)) + geom_point() + geom_abline(intercept = 0, slope = 1) + theme_minimal() + xlab("original ratio of standard deviations (y to x)") + ylab("truncated ratio of standard deviations (y to x)") ggplot(toP, aes(orig_sd_y/trunc_sd_y, orig_sd_x/trunc_sd_x)) + geom_point() + geom_abline(intercept = 0, slope = 1) + theme_minimal() + xlab("original sd of y / truncated sd of y") + ylab("original sd of x / truncated sd of x")