library(ggplot2)

set.seed(213141)

x <- rnorm(250, 45, 5)


epsilon1 <- rnorm(250, 0, 1)
epsilon2 <- rnorm(250, 0, 2)
epsilon3 <- rnorm(250, 0, 5)
epsilon4 <- rnorm(250, 0, 10)

sd_val <- c(1, 2, 5, 10)

epsilons <- list(epsilon1, epsilon2, epsilon3, epsilon4)

slopes <- c(-5, -2, -1, -0.25, 0, 0.25, 1, 2, 5)


y_vals <- vector("list", length(epsilons) * length(slopes))

k <- 1
for (i in 1:length(epsilons)) {
  print(i)
  for (j in 1:length(slopes)) {
    y_vals[[k]] <- 25 + slopes[j] * x + epsilons[[i]]
    k <- k + 1
    print(j)
  }
}


i_val <- rep(1:length(epsilons), each = length(slopes))
j_val <- rep(1:length(slopes), times = length(epsilons))


orig_cor <- c()
trunc_cor <- c()
est_slope <- c()
est_slope_trunc <- c()
orig_sd_ratio <- c()
trunc_sd_ratio <- c()
orig_sd_y <- c()
trunc_sd_y <- c()
orig_sd_x <- c()
trunc_sd_x <- c()
tails <- which(x < quantile(x, 0.05) | x > quantile(x, 0.95))
x_trunc <- x[-tails]
for (i in 1:length(y_vals)) {
  orig_cor <- c(orig_cor, cor(x, y_vals[[i]]))
  y_trunc <- y_vals[[i]][-tails]
  trunc_cor <- c(trunc_cor, cor(x_trunc, y_trunc))

  est_slope <- c(est_slope, unname(coef(lm(y_vals[[i]] ~ x))[2]))
  est_slope_trunc <- c(est_slope_trunc, unname(coef(lm(y_trunc ~ x_trunc))[2]))
  
  orig_sd_y <- c(orig_sd_y, sd(y_vals[[i]]))
  orig_sd_x <- c(orig_sd_x, sd(x))
  trunc_sd_y <- c(trunc_sd_y, sd(y_trunc))
  trunc_sd_x <- c(trunc_sd_x, sd(x_trunc))
  orig_sd_ratio <- c(orig_sd_ratio, sd(y_vals[[i]])/sd(x))
  trunc_sd_ratio <- c(trunc_sd_ratio, sd(y_trunc)/sd(x_trunc))
}

toP <- cbind.data.frame(
  epsilon_sd = sd_val[i_val],
  true_slope = slopes[j_val],
  orig_cor = orig_cor,
  trunc_cor = trunc_cor,
  est_slope = est_slope,
  est_slope_trunc = est_slope_trunc,
  orig_sd_ratio = orig_sd_ratio,
  trunc_sd_ratio = trunc_sd_ratio,
  orig_sd_y = orig_sd_y,
  orig_sd_x = orig_sd_x,
  trunc_sd_y = trunc_sd_y,
  trunc_sd_x = trunc_sd_x
)


ggplot(toP, aes(orig_cor, trunc_cor)) +
  geom_point() +
  geom_abline(intercept = 0, slope = 1) +
  geom_vline(xintercept = 0, lty = 2) +
  theme_minimal() + 
  xlab("original correlation") +
  ylab("truncated correlation")


ggplot(toP, aes(abs(orig_cor), abs(trunc_cor))) +
  geom_point() +
  geom_abline(intercept = 0, slope = 1) +
  theme_minimal() + 
  xlab("|original correlation|") +
  ylab("|truncated correlation|")


ggplot(toP, aes(est_slope, est_slope_trunc)) +
  geom_point() +
  geom_abline(intercept = 0, slope = 1) +
  theme_minimal() +
  xlab("original estimated slope") +
  ylab("truncated estimated slope")

ggplot(toP, aes(orig_sd_ratio, trunc_sd_ratio)) +
  geom_point() +
  geom_abline(intercept = 0, slope = 1) +
  theme_minimal() +
  xlab("original ratio of standard deviations (y to x)") +
  ylab("truncated ratio of standard deviations (y to x)")

ggplot(toP, aes(orig_sd_y/trunc_sd_y, orig_sd_x/trunc_sd_x)) +
  geom_point() +
  geom_abline(intercept = 0, slope = 1) +
  theme_minimal() +
  xlab("original sd of y / truncated sd of y") +
  ylab("original sd of x / truncated sd of x")