Skip to content

Instantly share code, notes, and snippets.

@BERENZ
Created April 11, 2024 11:11
Show Gist options
  • Save BERENZ/f6adf3b3c19f39521af742032d091b01 to your computer and use it in GitHub Desktop.
Save BERENZ/f6adf3b3c19f39521af742032d091b01 to your computer and use it in GitHub Desktop.
Przykład z overlap
library(nonprobsvy)
seed_for_sim <- 2024
set.seed(seed_for_sim)
## pop and sample sizes
N <- 100000 ## populacja
n_a <- N*0.7 ## proba big data o wielkosci 70%
n_b <- 10000 ## duza proba losowa
n_a1 <- 0.7 * n_a
n_a2 <- 0.3 * n_a
## generate data
x <- rnorm(N, 2, 1)
z <- x + max(x)
e <- rnorm(N)
y1 <- 1 + 2*x + e
y2 <- 3 + x + 2*e
y3 <- 2.5 + 0.5*x^2 +e
strata <- x <= 2
pop <- data.frame(x, z, pi_z = n_b*z/sum(z), y1, y2, y3, strata)
res <- list()
nsims <- 500
## nonprob sample
for (k in 1:nsims) {
if (k %% 50 == 0) print(k)
set.seed(k)
pop1 <- subset(pop, strata == TRUE)
pop2 <- subset(pop, strata == FALSE)
sample_a_500 <- rbind(pop1[sample(1:nrow(pop1), n_a1[1]), ],
pop2[sample(1:nrow(pop2), n_a2[1]), ])
## sample prob
sample_b <- pop[sample(1:N, n_b),]
sample_b$w_b <- N/n_b
svy_b <- svydesign(ids= ~1, weights = ~ w_b, data = sample_b)
trues <- colMeans(pop[, c("y1", "y2", "y3")])
naive_500 <- colMeans(sample_a_500[, c("y1", "y2", "y3")])
mi_glm_500 <- nonprob(outcome = y1 + y2 + y3 ~ x,
data = sample_a_500, svydesign = svy_b,
se = TRUE,
method_outcome = "nn")
res[[k]] <- data.frame(k = k,
y = c("y1","y2", "y3"),
trues = trues,
naive_5 = naive_500,
glm_5 = mi_glm_500$output$mean,
glm_500_ci = as.numeric(mi_glm_500$confidence_interval[1] < trues &
mi_glm_500$confidence_interval[2] > trues)
)
}
## wyniki
res_df <- rbindlist(res)
results_simulation1_process <- res_df |> melt(id.vars = 1:3)
results_simulation1_process[, c("est", "type", "ci"):=tstrsplit(variable, "_")]
## przedzial ufnosci bliskie 100% zamiast 95%
results_simulation1_process[!is.na(ci) , .(ci=mean(value)), .(type, est, y)]
## obciazenia nie ma ale blad standrdowy jest za duży co powoduje, ze pokrycie CI jest bliskie 100%
results_simulation1_process[is.na(ci), .(bias=mean(value)-mean(trues), se = sd(value),
rmse = sqrt((mean(value)-mean(trues))^2 + var(value))), .(type, est, y)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment