Skip to content

Instantly share code, notes, and snippets.

@soodoku
Last active January 15, 2025 02:49
Show Gist options
  • Save soodoku/f59fdfaf2deaa4acb8a7c8153984228b to your computer and use it in GitHub Desktop.
Save soodoku/f59fdfaf2deaa4acb8a7c8153984228b to your computer and use it in GitHub Desktop.
Alternate Herding
-------------
set.seed(123)
# Step 1: Simulate Population
num_strata <- 10 # Number of strata
population_size <- 100000 # Total population size
# Generate strata sizes and proportions
strata_sizes <- sample(1000:15000, num_strata, replace = TRUE)
strata_sizes <- round(strata_sizes / sum(strata_sizes) * population_size)
# Make strata means vary widely across strata
strata_means <- runif(num_strata, 0.1, 0.9) # Means range between 0.1 and 0.9
# Create population
population <- unlist(mapply(function(size, mean) {
rbinom(size, 1, mean)
}, size = strata_sizes, mean = strata_means))
strata_labels <- rep(1:num_strata, strata_sizes)
# Create population data frame
population_df <- data.frame(
value = population,
strata = strata_labels
)
# Step 2: Simulate Stratified Random Samples
n_samples <- 1000 # Sample size per poll
n_polls <- 5000 # Number of simulated polls
stratified_samples <- replicate(n_polls, {
lapply(1:num_strata, function(stratum) {
stratum_data <- population_df$value[population_df$strata == stratum]
sample(stratum_data, size = round(n_samples * (strata_sizes[stratum] / population_size)))
})
}, simplify = FALSE)
# Step 3: Analyze Polls
calculate_ci <- function(mean, se, level = 0.95) {
z <- qnorm(1 - (1 - level) / 2)
c(mean - z * se, mean + z * se)
}
analyze_poll <- function(samples, treat_as_srs = TRUE) {
poll_estimates <- sapply(samples, function(sample) {
if (treat_as_srs) {
mean(unlist(sample)) # Treat as simple random sample
} else {
# Stratified mean calculation
sapply(1:num_strata, function(stratum) {
mean(sample[[stratum]]) * (strata_sizes[stratum] / population_size)
}) %>% sum()
}
})
poll_cis <- t(sapply(1:n_polls, function(i) {
mean_poll <- poll_estimates[i]
se <- if (treat_as_srs) {
sqrt(mean_poll * (1 - mean_poll) / n_samples) # Proper SE for SRS
} else {
sqrt(sum(sapply(1:num_strata, function(stratum) {
(mean(samples[[i]][[stratum]]) * (1 - mean(samples[[i]][[stratum]])) /
round(n_samples * (strata_sizes[stratum] / population_size))) * (strata_sizes[stratum] / population_size)^2
}))) # Proper SE for stratified sampling
}
calculate_ci(mean_poll, se)
}))
coverage <- mean(sapply(1:n_polls, function(i) {
paired_poll <- sample(1:n_polls, 1) # Randomly pair polls
poll_estimates[paired_poll] >= poll_cis[i, 1] & poll_estimates[paired_poll] <= poll_cis[i, 2]
}))
list(estimates = poll_estimates, ci = poll_cis, coverage = coverage)
}
# Analyze as SRS
srs_analysis <- analyze_poll(stratified_samples, treat_as_srs = TRUE)
# Analyze as Stratified
stratified_analysis <- analyze_poll(stratified_samples, treat_as_srs = FALSE)
# Results
cat("Coverage (SRS):", srs_analysis$coverage, "\n")
cat("Coverage (Stratified):", stratified_analysis$coverage, "\n")
#> cat("Coverage (SRS):", srs_analysis$coverage, "\n")
#Coverage (SRS): 0.9052
#> cat("Coverage (Stratified):", stratified_analysis$coverage, "\n")
#Coverage (Stratified): 0.8284
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment