Skip to content

Instantly share code, notes, and snippets.

@sastoudt
Last active February 17, 2025 16:46
Investigation of Impact of Truncation on Correlation and Estimated Slopes
library(ggplot2)
set.seed(213141)
x <- rnorm(250, 45, 5)
epsilon1 <- rnorm(250, 0, 1)
epsilon2 <- rnorm(250, 0, 2)
epsilon3 <- rnorm(250, 0, 5)
epsilon4 <- rnorm(250, 0, 10)
sd_val <- c(1, 2, 5, 10)
epsilons <- list(epsilon1, epsilon2, epsilon3, epsilon4)
slopes <- c(-5, -2, -1, -0.25, 0, 0.25, 1, 2, 5)
y_vals <- vector("list", length(epsilons) * length(slopes))
k <- 1
for (i in 1:length(epsilons)) {
print(i)
for (j in 1:length(slopes)) {
y_vals[[k]] <- 25 + slopes[j] * x + epsilons[[i]]
k <- k + 1
print(j)
}
}
i_val <- rep(1:length(epsilons), each = length(slopes))
j_val <- rep(1:length(slopes), times = length(epsilons))
orig_cor <- c()
trunc_cor <- c()
est_slope <- c()
est_slope_trunc <- c()
orig_sd_ratio <- c()
trunc_sd_ratio <- c()
orig_sd_y <- c()
trunc_sd_y <- c()
orig_sd_x <- c()
trunc_sd_x <- c()
tails <- which(x < quantile(x, 0.05) | x > quantile(x, 0.95))
x_trunc <- x[-tails]
for (i in 1:length(y_vals)) {
orig_cor <- c(orig_cor, cor(x, y_vals[[i]]))
y_trunc <- y_vals[[i]][-tails]
trunc_cor <- c(trunc_cor, cor(x_trunc, y_trunc))
est_slope <- c(est_slope, unname(coef(lm(y_vals[[i]] ~ x))[2]))
est_slope_trunc <- c(est_slope_trunc, unname(coef(lm(y_trunc ~ x_trunc))[2]))
orig_sd_y <- c(orig_sd_y, sd(y_vals[[i]]))
orig_sd_x <- c(orig_sd_x, sd(x))
trunc_sd_y <- c(trunc_sd_y, sd(y_trunc))
trunc_sd_x <- c(trunc_sd_x, sd(x_trunc))
orig_sd_ratio <- c(orig_sd_ratio, sd(y_vals[[i]])/sd(x))
trunc_sd_ratio <- c(trunc_sd_ratio, sd(y_trunc)/sd(x_trunc))
}
toP <- cbind.data.frame(
epsilon_sd = sd_val[i_val],
true_slope = slopes[j_val],
orig_cor = orig_cor,
trunc_cor = trunc_cor,
est_slope = est_slope,
est_slope_trunc = est_slope_trunc,
orig_sd_ratio = orig_sd_ratio,
trunc_sd_ratio = trunc_sd_ratio,
orig_sd_y = orig_sd_y,
orig_sd_x = orig_sd_x,
trunc_sd_y = trunc_sd_y,
trunc_sd_x = trunc_sd_x
)
ggplot(toP, aes(orig_cor, trunc_cor)) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
geom_vline(xintercept = 0, lty = 2) +
theme_minimal() +
xlab("original correlation") +
ylab("truncated correlation")
ggplot(toP, aes(abs(orig_cor), abs(trunc_cor))) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
theme_minimal() +
xlab("|original correlation|") +
ylab("|truncated correlation|")
ggplot(toP, aes(est_slope, est_slope_trunc)) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
theme_minimal() +
xlab("original estimated slope") +
ylab("truncated estimated slope")
ggplot(toP, aes(orig_sd_ratio, trunc_sd_ratio)) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
theme_minimal() +
xlab("original ratio of standard deviations (y to x)") +
ylab("truncated ratio of standard deviations (y to x)")
ggplot(toP, aes(orig_sd_y/trunc_sd_y, orig_sd_x/trunc_sd_x)) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
theme_minimal() +
xlab("original sd of y / truncated sd of y") +
ylab("original sd of x / truncated sd of x")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment