Skip to content

Instantly share code, notes, and snippets.

@markhwhiteii
Created February 7, 2019 23:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markhwhiteii/9461f28798419dbe50d0bfac84cbb355 to your computer and use it in GitHub Desktop.
Save markhwhiteii/9461f28798419dbe50d0bfac84cbb355 to your computer and use it in GitHub Desktop.
# define functions -------------------------------------------------------------
inv_logit <- function(x) exp(x) / (exp(x) + 1)
sim_pop <- function(N = 5000, b_min = 0, b_max = 1.5) {
X <- cbind(1, rbinom(N, 1, .5), rbinom(N, 1, .5))
B <- c(0, runif(1, b_min, b_max), runif(1, b_min, b_max))
y <- rbinom(N, 1, inv_logit(X %*% B))
return(as.data.frame(cbind(X[, -1], y)))
}
get_sample <- function(pop, p = c(.02, .02, .02, .02)) {
p <- ifelse(rowSums(pop[, 1:2]) == 2, p[1],
ifelse(rowSums(pop[, 1:2]) == 0, p[2],
ifelse(pop[, 1] == 1, p[3], p[4])))
sampled <- as.logical(rbinom(nrow(pop), 1, p))
return(pop[sampled, ])
}
get_weights <- function(dat, pop) {
tmp1 <- as.data.frame(prop.table(table(pop$V1)))
tmp1$Freq <- tmp1$Freq * nrow(dat)
names(tmp1)[1] <- "V1"
tmp2 <- as.data.frame(prop.table(table(pop$V2)))
tmp2$Freq <- tmp2$Freq * nrow(dat)
names(tmp2)[1] <- "V2"
result <- invisible(suppressWarnings(survey::svydesign(~ 1, data = dat)))
result <- survey::rake(result, list(~V1, ~V2), list(tmp1, tmp2))
return(weights(result))
}
run_iter <- function(pop, non_rand_p = c(.065, .005, .005, .005)) {
pop_summary <- summary(glm(y ~ 1, binomial, pop))$coef[, 1:2]
rnd_summary <- summary(glm(y ~ 1, binomial, get_sample(pop)))$coef[, 1:2]
tmp <- get_sample(pop, non_rand_p)
nrd_summary <- summary(glm(y ~ 1, binomial, tmp))$coef[, 1:2]
wts <- get_weights(tmp, pop)
wtd_summary <- summary(glm(y ~ 1, binomial, tmp, wts))$coef[, 1:2]
return(list(
population = pop_summary,
rand_sample = rnd_summary,
nonrand_sample = nrd_summary,
weighted_sample = wtd_summary
))
}
# get results ------------------------------------------------------------------
library(tidyverse)
set.seed(1839)
iter <- 5000
results <- lapply(seq_len(iter), function(i) {
do.call(rbind, run_iter(sim_pop())) %>%
as.data.frame() %>%
rownames_to_column("type") %>%
gather("stat", "est", -type) %>%
mutate(iter = i)
})
results <- do.call(bind_rows, results)
# estimates --------------------------------------------------------------------
(dist_ests <- results %>%
filter(stat == "Estimate") %>%
group_by(type) %>%
summarise(mean = mean(est)))
results %>%
filter(stat == "Estimate") %>%
ggplot(aes(x = est)) +
geom_density(alpha = .5) +
facet_wrap(~ type) +
geom_vline(aes(xintercept = dist_ests$mean[dist_ests$type == "population"]),
linetype = "dotted")
# standard errors --------------------------------------------------------------
results %>%
filter(stat == "Std. Error" & type != "population") %>%
group_by(type) %>%
summarise(avg_se = mean(est))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment