Create a gist now

Instantly share code, notes, and snippets.

@soodoku /missing.R
Last active Apr 22, 2016

What would you like to do?
plotting missing
# Load libs
library(ggplot2)
# Simulate correlated data
R = matrix(cbind(1,.80, .80,1), nrow=2)
U = t(chol(R))
n_vars = dim(U)[1]
n_obs = 10000
# Set seed for reproducibility
set.seed(31415)
random_normal = matrix(rnorm(n_vars*n_obs, 0, 1), nrow=n_vars, ncol=n_obs)
X = U %*% random_normal
# Convert to df and confirm
data = as.data.frame(t(X))
names(data) = c("response","predictor")
cor(data)
# Induce missing in response
data$response[tail(order(data$response), 100)] <- NA
# Missing
data$missing <- is.na(data$response)
# Rescale missing to match range of y
data$missing <- ifelse(data$missing, max(data$response, na.rm=T), NA)
ggplot(data, aes(x = predictor, y = response)) +
geom_point(alpha=.03) +
geom_smooth() +
geom_hline(aes(yintercept = max(data$response, na.rm=T)), size=.05, color="#333333", linetype="dotted", alpha=.7) +
geom_point(aes(x=data$predictor, y= data$missing), alpha=.03, color="red") +
geom_text(data=NULL, label = "Missing", y = max(data$response, na.rm=T) + .1, x = -4.8, size=3) +
theme_minimal()
ggsave(file="missing.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment