plotting missing
# Load libs | |
library(ggplot2) | |
# Simulate correlated data | |
R = matrix(cbind(1,.80, .80,1), nrow=2) | |
U = t(chol(R)) | |
n_vars = dim(U)[1] | |
n_obs = 10000 | |
# Set seed for reproducibility | |
set.seed(31415) | |
random_normal = matrix(rnorm(n_vars*n_obs, 0, 1), nrow=n_vars, ncol=n_obs) | |
X = U %*% random_normal | |
# Convert to df and confirm | |
data = as.data.frame(t(X)) | |
names(data) = c("response","predictor") | |
cor(data) | |
# Induce missing in response | |
data$response[tail(order(data$response), 100)] <- NA | |
# Missing | |
data$missing <- is.na(data$response) | |
# Rescale missing to match range of y | |
data$missing <- ifelse(data$missing, max(data$response, na.rm=T), NA) | |
ggplot(data, aes(x = predictor, y = response)) + | |
geom_point(alpha=.03) + | |
geom_smooth() + | |
geom_hline(aes(yintercept = max(data$response, na.rm=T)), size=.05, color="#333333", linetype="dotted", alpha=.7) + | |
geom_point(aes(x=data$predictor, y= data$missing), alpha=.03, color="red") + | |
geom_text(data=NULL, label = "Missing", y = max(data$response, na.rm=T) + .1, x = -4.8, size=3) + | |
theme_minimal() | |
ggsave(file="missing.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment