Skip to content

Instantly share code, notes, and snippets.

@mikekaminsky
Last active May 6, 2020 01:37
Show Gist options
  • Save mikekaminsky/e611cfa94edfb7a909e42866b67c8daa to your computer and use it in GitHub Desktop.
Save mikekaminsky/e611cfa94edfb7a909e42866b67c8daa to your computer and use it in GitHub Desktop.
library(dplyr)
library(progress)
library(foreach)
library(doParallel)
library(tidyr)
library(magrittr)
##########################
# Blog Post Example
##########################
p_x <- 0.2
R <- -0.1
total_pop <- 1000000
P_J_XJ1 <- p_x + p_x * (1 - p_x) * R
P_J_XJ0 <- p_x - p_x * (1 - p_x) * R
delta <- (P_J_XJ1 - P_J_XJ0)
total_pop <- 1000000
true_choice <- sample(c("Hawaiian", "Other"),
size = total_pop,
prob = c(p_x, 1 - p_x),
replace = TRUE
)
true_prop <- true_choice %>%
table() %>%
as.list() %>%
lapply(., function(x) x / length(true_choice))
n_sims <- 200
R <- -0.1
sample_sizes <- c(50, 100, 1000, 10000, 100000, 500000)
jj <- 1
samp_strats <- c("srs", "biased")
cores <- detectCores()
cl <- makeCluster(pmin(cores[1] - 1, length(samp_strats)), outfile = "")
registerDoParallel(cl)
res_strats <- foreach(style = iter(samp_strats), .combine = plyr::rbind.fill) %dopar% {
pb <- progress::progress_bar$new(
format = "[:bar] :current/:total (:percent)",
total = n_sims * length(sample_sizes)
)
pb$tick(0)
library(magrittr)
library(dplyr)
res_l <- vector(mode = "list", length = n_sims * length(sample_sizes))
for (sample_size in sample_sizes) {
resp_p <- sample_size / total_pop
res_p_t <- resp_p - (1 - p_x) * resp_p * R
res_p_c <- resp_p + (1-(1 - p_x)) * resp_p * R
for (i in seq(n_sims)) {
if (style == "srs") {
sample <- sample(true_choice, size = sample_size, replace = FALSE)
} else {
c_t <- rep(0, total_pop)
# Who responds among people who like Hawaiian?
c_t[true_choice == "Hawaiian"] <- sample(c(1, 0),
length(c_t[true_choice == "Hawaiian"]),
prob = c(res_p_t, 1 - res_p_t),
replace = TRUE
)
# Who responds among people who like other?
c_t[true_choice == "Other"] <- sample(c(1, 0),
length(c_t[true_choice == "Other"]),
prob = c(res_p_c, 1 - res_p_c),
replace = TRUE
)
sample <- true_choice[c_t == 1]
}
df <- sample %>%
table() %>%
as.list() %>%
lapply(., function(x) x / length(sample)) %>%
data.frame() %>%
dplyr::mutate(
n = sample_size,
type = style
)
res_l[[jj]] <- df
jj <- jj + 1
pb$tick()
}
}
plyr::rbind.fill(res_l) %>%
mutate(
error = Hawaiian - true_prop$Hawaiian,
error_pct = error / true_prop$Hawaiian
)
}
# Compile statistics table
res_strats %>%
group_by(n, type) %>%
summarize(
rmse = sqrt(mean(error**2)),
mape = mean(abs(error_pct))
) %>%
pivot_wider(id_cols = "n", names_from = "type", values_from = c("rmse", "mape"))
stopCluster(cl)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment