markhwhiteii/puzzling_mlmpreds.R Secret

## puzzling_mlmpreds.R
library(parallel)
cl <- makeCluster(detectCores())
results <- parLapply(cl, 1:5000, function(placeholder) {
  library(tidyverse)
  library(lme4)
  inv_logit <- function(x) exp(x) / (1 + exp(x))
  j <- 400
  id <- 1:j
  g00 <- -1.5
  g01 <- .2
  u0sd <- 1
  ij <- 3
  x <- rbinom(j, 1, .5)
  u0 <- rnorm(j, 0, u0sd)
  yjlogit <- g00 + g01 * x + u0
  dat <- data.frame(id, g00, g01, x, u0, yjlogit)
  dat <- dat[rep(seq_len(nrow(dat)), each = ij), ]
  dat$y <- rbinom(nrow(dat), 1, inv_logit(dat$yjlogit))
  mod <- glmer(y ~ x + (1 | id), dat, binomial)

  # replicate distribution of 1, 2, and 3 obs like in data
  dat2 <- dat[dat$id < 41, ]
  dat2 <- dat[dat$id > 40 & dat$id < 161, ] %>%
    group_by(id) %>%
    slice(1:2) %>%
    bind_rows(dat2, .)
  dat2 <- dat[dat$id > 160, ] %>%
    group_by(id) %>%
    slice(1) %>%
    bind_rows(dat2, .)
  mod2 <- glmer(y ~ x + (1 | id), dat2, binomial)

  # is just 1 obs problem? just 2 and 3
  dat3 <- dat[dat$id < 41, ]
  dat3 <- dat[dat$id > 40, ] %>%
    group_by(id) %>%
    slice(1:2) %>%
    bind_rows(dat3, .)
  mod3 <- glmer(y ~ x + (1 | id), dat3, binomial)

  # compare performance
  out <- c(
    # mod
    u0_3obs = diff(c(u0sd, as.data.frame(VarCorr(mod))$sdcor)),
    g00_3obs = diff(c(g00, fixef(mod)[[1]])),
    g01_3obs = diff(c(g01, fixef(mod)[[2]])),
    pred0_3obs = inv_logit(fixef(mod)[[1]]),
    pred1_3obs = inv_logit(sum(fixef(mod))),
    # mod2
    u0_123obs = diff(c(u0sd, as.data.frame(VarCorr(mod2))$sdcor)),
    g00_123obs = diff(c(g00, fixef(mod2)[[1]])),
    g01_123obs = diff(c(g01, fixef(mod2)[[2]])),
    pred0_123obs = inv_logit(fixef(mod2)[[1]]),
    pred1_123obs = inv_logit(sum(fixef(mod2))),
    # mod3
    u0_23obs = diff(c(u0sd, as.data.frame(VarCorr(mod3))$sdcor)),
    g00_23obs = diff(c(g00, fixef(mod3)[[1]])),
    g01_23obs = diff(c(g01, fixef(mod3)[[2]])),
    pred0_23obs = inv_logit(fixef(mod3)[[1]]),
    pred1_23obs = inv_logit(sum(fixef(mod3)))
  )
})
stopCluster(cl)
results <- as.data.frame(do.call(rbind, results))
results %>%
  mutate(iter = 1:nrow(.)) %>%
  gather(... = -iter) %>%
  separate("key", c("param", "mod")) %>%
  spread(param, value) %>%
  group_by(mod) %>%
  summarise_at(vars(g00:u0), mean)

results %>%
  mutate(iter = 1:nrow(.)) %>%
  gather(... = -iter) %>%
  separate("key", c("param", "mod")) %>%
  filter(param %in% c("pred0", "pred1")) %>%
  ggplot(aes(x = value, fill = param)) +
  geom_density(alpha = .7) +
  facet_wrap(~ mod)
	library(parallel)
	cl <- makeCluster(detectCores())
	results <- parLapply(cl, 1:5000, function(placeholder) {
	library(tidyverse)
	library(lme4)
	inv_logit <- function(x) exp(x) / (1 + exp(x))
	j <- 400
	id <- 1:j
	g00 <- -1.5
	g01 <- .2
	u0sd <- 1
	ij <- 3
	x <- rbinom(j, 1, .5)
	u0 <- rnorm(j, 0, u0sd)
	yjlogit <- g00 + g01 * x + u0
	dat <- data.frame(id, g00, g01, x, u0, yjlogit)
	dat <- dat[rep(seq_len(nrow(dat)), each = ij), ]
	dat$y <- rbinom(nrow(dat), 1, inv_logit(dat$yjlogit))
	mod <- glmer(y ~ x + (1 \| id), dat, binomial)

	# replicate distribution of 1, 2, and 3 obs like in data
	dat2 <- dat[dat$id < 41, ]
	dat2 <- dat[dat$id > 40 & dat$id < 161, ] %>%
	group_by(id) %>%
	slice(1:2) %>%
	bind_rows(dat2, .)
	dat2 <- dat[dat$id > 160, ] %>%
	group_by(id) %>%
	slice(1) %>%
	bind_rows(dat2, .)
	mod2 <- glmer(y ~ x + (1 \| id), dat2, binomial)

	# is just 1 obs problem? just 2 and 3
	dat3 <- dat[dat$id < 41, ]
	dat3 <- dat[dat$id > 40, ] %>%
	group_by(id) %>%
	slice(1:2) %>%
	bind_rows(dat3, .)
	mod3 <- glmer(y ~ x + (1 \| id), dat3, binomial)

	# compare performance
	out <- c(
	# mod
	u0_3obs = diff(c(u0sd, as.data.frame(VarCorr(mod))$sdcor)),
	g00_3obs = diff(c(g00, fixef(mod)[[1]])),
	g01_3obs = diff(c(g01, fixef(mod)[[2]])),
	pred0_3obs = inv_logit(fixef(mod)[[1]]),
	pred1_3obs = inv_logit(sum(fixef(mod))),
	# mod2
	u0_123obs = diff(c(u0sd, as.data.frame(VarCorr(mod2))$sdcor)),
	g00_123obs = diff(c(g00, fixef(mod2)[[1]])),
	g01_123obs = diff(c(g01, fixef(mod2)[[2]])),
	pred0_123obs = inv_logit(fixef(mod2)[[1]]),
	pred1_123obs = inv_logit(sum(fixef(mod2))),
	# mod3
	u0_23obs = diff(c(u0sd, as.data.frame(VarCorr(mod3))$sdcor)),
	g00_23obs = diff(c(g00, fixef(mod3)[[1]])),
	g01_23obs = diff(c(g01, fixef(mod3)[[2]])),
	pred0_23obs = inv_logit(fixef(mod3)[[1]]),
	pred1_23obs = inv_logit(sum(fixef(mod3)))
	)
	})
	stopCluster(cl)
	results <- as.data.frame(do.call(rbind, results))
	results %>%
	mutate(iter = 1:nrow(.)) %>%
	gather(... = -iter) %>%
	separate("key", c("param", "mod")) %>%
	spread(param, value) %>%
	group_by(mod) %>%
	summarise_at(vars(g00:u0), mean)

	results %>%
	mutate(iter = 1:nrow(.)) %>%
	gather(... = -iter) %>%
	separate("key", c("param", "mod")) %>%
	filter(param %in% c("pred0", "pred1")) %>%
	ggplot(aes(x = value, fill = param)) +
	geom_density(alpha = .7) +
	facet_wrap(~ mod)