Last active
April 17, 2020 17:14
-
-
Save jwbowers/030c7dd78fdcd82d1582198b5eab9f54 to your computer and use it in GitHub Desktop.
A simple sampling design for declare design
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## We have many group homes (like nursing homes) and few COVID tests. | |
## We want to test a simple random sample within each home to estimate the proportion positive with COVID (or detect | |
## any COVID) | |
## So, each place has a fixed population (N) and we will sample from this place. | |
## The population should be unique for combination of N and covidprob (which is just .02 in the simulations, but higher for | |
## testing here). | |
## I'd like to use the nice DeclareDesign functions like redesign() to assess | |
## power to detect COVID given different proportions of sampling for each population size | |
pop_fn <- function(N,covidprob){ | |
## each N and covidprob should produce the same population each time | |
## The population of the nursing home is fixed. | |
#set.seed(12345) | |
#fixed_pop <- fabricate(N=N,covidprob=covidprob,covid=rbinom(N,size=1,prob=covidprob)) | |
fixed_pop <- data.frame(covid=rep(c(0, 1), c(N*(1-covidprob), N*covidprob))) | |
return(fixed_pop) | |
} | |
pop <- declare_population(handler=pop_fn,N,covidprob) | |
custom_sampler <- function(data,prop_samp){ | |
samp_dat <- sample_n(data,floor(nrow(data)*prop_samp)) | |
## covid column records true positive versus negative | |
## use a 10% error rate to chance positives into negatives | |
flip <- rbinom(n=sum(samp_dat$covid),size=1,prob=.1) | |
## covid_detected column records what our tests tell us (currently missing 10% of positives) | |
samp_dat$covid_detected <- with(samp_dat,ifelse(covid==1,(1-flip)*covid[covid==1],0)) | |
## Make an finite population correction column for use of svydesign, svyciprop, etc.. | |
samp_dat$fpc <- rep(prop_samp,nrow(samp_dat)) | |
return(samp_dat) | |
} | |
samp <- declare_sampling(handler = custom_sampler, prop_samp=prop_samp) | |
## The estimand is simply the proportion of people positive for covid in the facility (of a given size N) | |
estimand <- declare_estimand(mean(covid),label="prop covid") | |
estimator3 <- declare_estimator(covid_detected~1,model=lm_robust, se_type="HC2", label="lm_err") | |
dtest_template <- pop + estimand + samp + estimator3 | |
dtest <- redesign(dtest_template, | |
N=200, ## 200 person place | |
prop_samp=.5, ## sample .5 | |
covidprob = .02) ## .02 positive covid | |
## Population is fixed so estimands should not vary acros samples | |
> draw_estimands(dtest) | |
estimand_label estimand | |
1 prop covid 0.03 | |
> draw_estimands(dtest) | |
estimand_label estimand | |
1 prop covid 0.03 | |
> dtest_dat1 <- draw_data(dtest) | |
> dtest_dat2 <- draw_data(dtest) | |
> stopifnot(!all.equal(dtest_dat1,dtest_dat2)) | |
Error: !all.equal(dtest_dat1, dtest_dat2) is not TRUE | |
> draw_estimates(dtest) | |
estimator_label term estimate std.error statistic p.value conf.low conf.high df outcome | |
1 lm_err (Intercept) 0.02 0.01407053 1.421411 0.1583399 -0.007918983 0.04791898 99 covid_detected | |
> draw_estimates(dtest) | |
estimator_label term estimate std.error statistic p.value conf.low conf.high df outcome | |
1 lm_err (Intercept) 0.02 0.01407053 1.421411 0.1583399 -0.007918983 0.04791898 99 covid_detected | |
> | |
#### OLD STUFF | |
> N <- 40 | |
> covidprob <- .5 | |
> n <- 20 | |
> pop <- declare_population(N, covid = rbinom(N, size = 1, prob = covidprob)) | |
> fixed_pop <- pop() | |
> design_fixed <- | |
+ declare_population(data = fixed_pop) + | |
+ declare_sampling(n=n) + | |
+ declare_estimand(mean(covid),label="prop covid") | |
> draw_estimands(design_fixed) | |
estimand_label estimand | |
1 prop covid 0.55 | |
> draw_estimands(design_fixed) | |
estimand_label estimand | |
1 prop covid 0.6 | |
> draw_estimands(design_fixed) | |
estimand_label estimand | |
1 prop covid 0.65 | |
> draw_estimands(design_fixed) | |
estimand_label estimand | |
1 prop covid 0.7 | |
##### OLD STUFF | |
> pop <- declare_population(N, | |
+ covid=rbinom(N,size=1,prob=covidprob)) | |
> samp <- declare_sampling(n=n) | |
> sim_design <- pop + samp | |
> set.seed(12345) | |
> tmp <- draw_data(redesign(sim_design,N=40,n=20,covidprob=.5)) | |
> table(tmp$covid) | |
0 1 | |
9 11 | |
> stopifnot( abs( mean(tmp$covid) - .5 ) < .1) | |
> estimand <- declare_estimand(mean(covid),label="prop covid") | |
> estimator1 <- declare_estimator(covid~1,model=lm_robust, se_type="HC0", label="prop positive") | |
> estimator2 <- declare_estimator(,label="prop pos with error") | |
> sim_design_est <- sim_design + estimand + estimator1 ## + estimator2 | |
> d_N40_n20_p5 <- redesign(sim_design_est,N=40,n=20,covidprob=.5) | |
> draw_estimands(d_N40_n20_p5) | |
estimand_label estimand | |
1 prop covid 0.6 | |
> draw_estimands(d_N40_n20_p5) | |
estimand_label estimand | |
1 prop covid 0.5 | |
> draw_estimands(d_N40_n20_p5) | |
estimand_label estimand | |
1 prop covid 0.4 | |
> draw_estimates(d_N40_n20_p5) | |
estimator_label term estimate std.error statistic p.value conf.low conf.high df outcome | |
1 prop positive (Intercept) 0.4 0.1095445 3.651484 0.001697388 0.1707207 0.6292793 19 covid | |
> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A fix from Alex Coppock that seems to work.