Created
January 12, 2022 23:08
-
-
Save scrogster/454259daf5abae37058449ce26c4c35c to your computer and use it in GitHub Desktop.
imputing missing values in JAGS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#simple missing data imputation | |
#1. simulate some fake data from a simple linear regression | |
beta0<-5 | |
beta1<--2 | |
sigma<- 2 | |
n=100 | |
x1<-runif(n, 0, 10) | |
y_bar<- beta0 +beta1*x1 | |
y_obs<-rnorm(n, y_bar, sigma) | |
#plot the fake data | |
plot(y_obs~x1) | |
#make first 20 x1 values missing, but keep them to check how well the imputation works in JAGS | |
hidden_values<-x1[1:20] | |
x1[1:20]<-NA #now set them to NA | |
#2. set up data object and model for JAGS | |
jags_dat<-list(y_obs=y_obs, | |
x1=x1, | |
n=n) | |
library(jagsUI) | |
modfile <- tempfile() | |
#Write model to file - simple linear regression | |
writeLines(" | |
model{ | |
for (i in 1:n){ | |
y_obs[i] ~ dnorm(mu[i], tau) | |
mu[i] <- beta0 + beta1*x1[i] | |
} | |
#Priors | |
beta0 ~ dnorm(0, 0.00001) | |
beta1 ~ dnorm(0, 0.00001) | |
sigma ~ dunif(0,1000) | |
tau <- pow(sigma,-2) | |
#priors on the missing values | |
for(k in 1:20) {x1[k]~dunif(0, 10)} | |
} | |
", con=modfile) | |
params=c("beta1", "beta0", "sigma", "x1[1:20]") | |
out <- jags(data = jags_dat, | |
parameters.to.save = params, | |
model.file = modfile, | |
n.chains = 3, | |
n.iter = 5000, | |
n.burnin = 1000) | |
#3. summarise the posteriors for the missing values | |
imputed_means<-colMeans(out$sims.list$x1) | |
imputed_lowerCI<-apply(out$sims.list$x1, 2, quantile, 0.025) | |
imputed_upperCI<-apply(out$sims.list$x1, 2, quantile, 0.975) | |
#4. Plot the imputations vs truth | |
plot(imputed_means~hidden_values, ylim=c(-5, 12), xlim=c(0, 10), | |
ylab="Imputed values", xlab="True values", pch=16, col="red") | |
abline(0, 1) | |
segments(x0=hidden_values, x1=hidden_values, y0=imputed_lowerCI, y1=imputed_upperCI, col="red") | |
title(main="Imputed values vs true values") | |
#5. Overlay the imputed values on the rest of the data | |
plot(y_obs~x1, main="Posteriors of imputed x1 values") #observations | |
points(x=imputed_means, y=y_obs[1:20], col="red", pch=16) | |
abline(a=beta0, b=beta1, lty=2, col="grey", lwd=2) | |
segments(x0=imputed_lowerCI, x1=imputed_upperCI, y0=y_obs[1:20], y1=y_obs[1:20], col="red") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment