Skip to content

Instantly share code, notes, and snippets.

@jwbowers
Last active April 17, 2020 17:14
Show Gist options
  • Save jwbowers/030c7dd78fdcd82d1582198b5eab9f54 to your computer and use it in GitHub Desktop.
Save jwbowers/030c7dd78fdcd82d1582198b5eab9f54 to your computer and use it in GitHub Desktop.
A simple sampling design for declare design
## We have many group homes (like nursing homes) and few COVID tests.
## We want to test a simple random sample within each home to estimate the proportion positive with COVID (or detect
## any COVID)
## So, each place has a fixed population (N) and we will sample from this place.
## The population should be unique for combination of N and covidprob (which is just .02 in the simulations, but higher for
## testing here).
## I'd like to use the nice DeclareDesign functions like redesign() to assess
## power to detect COVID given different proportions of sampling for each population size
pop_fn <- function(N,covidprob){
## each N and covidprob should produce the same population each time
## The population of the nursing home is fixed.
#set.seed(12345)
#fixed_pop <- fabricate(N=N,covidprob=covidprob,covid=rbinom(N,size=1,prob=covidprob))
fixed_pop <- data.frame(covid=rep(c(0, 1), c(N*(1-covidprob), N*covidprob)))
return(fixed_pop)
}
pop <- declare_population(handler=pop_fn,N,covidprob)
custom_sampler <- function(data,prop_samp){
samp_dat <- sample_n(data,floor(nrow(data)*prop_samp))
## covid column records true positive versus negative
## use a 10% error rate to chance positives into negatives
flip <- rbinom(n=sum(samp_dat$covid),size=1,prob=.1)
## covid_detected column records what our tests tell us (currently missing 10% of positives)
samp_dat$covid_detected <- with(samp_dat,ifelse(covid==1,(1-flip)*covid[covid==1],0))
## Make an finite population correction column for use of svydesign, svyciprop, etc..
samp_dat$fpc <- rep(prop_samp,nrow(samp_dat))
return(samp_dat)
}
samp <- declare_sampling(handler = custom_sampler, prop_samp=prop_samp)
## The estimand is simply the proportion of people positive for covid in the facility (of a given size N)
estimand <- declare_estimand(mean(covid),label="prop covid")
estimator3 <- declare_estimator(covid_detected~1,model=lm_robust, se_type="HC2", label="lm_err")
dtest_template <- pop + estimand + samp + estimator3
dtest <- redesign(dtest_template,
N=200, ## 200 person place
prop_samp=.5, ## sample .5
covidprob = .02) ## .02 positive covid
## Population is fixed so estimands should not vary acros samples
> draw_estimands(dtest)
estimand_label estimand
1 prop covid 0.03
> draw_estimands(dtest)
estimand_label estimand
1 prop covid 0.03
> dtest_dat1 <- draw_data(dtest)
> dtest_dat2 <- draw_data(dtest)
> stopifnot(!all.equal(dtest_dat1,dtest_dat2))
Error: !all.equal(dtest_dat1, dtest_dat2) is not TRUE
> draw_estimates(dtest)
estimator_label term estimate std.error statistic p.value conf.low conf.high df outcome
1 lm_err (Intercept) 0.02 0.01407053 1.421411 0.1583399 -0.007918983 0.04791898 99 covid_detected
> draw_estimates(dtest)
estimator_label term estimate std.error statistic p.value conf.low conf.high df outcome
1 lm_err (Intercept) 0.02 0.01407053 1.421411 0.1583399 -0.007918983 0.04791898 99 covid_detected
>
#### OLD STUFF
> N <- 40
> covidprob <- .5
> n <- 20
> pop <- declare_population(N, covid = rbinom(N, size = 1, prob = covidprob))
> fixed_pop <- pop()
> design_fixed <-
+ declare_population(data = fixed_pop) +
+ declare_sampling(n=n) +
+ declare_estimand(mean(covid),label="prop covid")
> draw_estimands(design_fixed)
estimand_label estimand
1 prop covid 0.55
> draw_estimands(design_fixed)
estimand_label estimand
1 prop covid 0.6
> draw_estimands(design_fixed)
estimand_label estimand
1 prop covid 0.65
> draw_estimands(design_fixed)
estimand_label estimand
1 prop covid 0.7
##### OLD STUFF
> pop <- declare_population(N,
+ covid=rbinom(N,size=1,prob=covidprob))
> samp <- declare_sampling(n=n)
> sim_design <- pop + samp
> set.seed(12345)
> tmp <- draw_data(redesign(sim_design,N=40,n=20,covidprob=.5))
> table(tmp$covid)
0 1
9 11
> stopifnot( abs( mean(tmp$covid) - .5 ) < .1)
> estimand <- declare_estimand(mean(covid),label="prop covid")
> estimator1 <- declare_estimator(covid~1,model=lm_robust, se_type="HC0", label="prop positive")
> estimator2 <- declare_estimator(,label="prop pos with error")
> sim_design_est <- sim_design + estimand + estimator1 ## + estimator2
> d_N40_n20_p5 <- redesign(sim_design_est,N=40,n=20,covidprob=.5)
> draw_estimands(d_N40_n20_p5)
estimand_label estimand
1 prop covid 0.6
> draw_estimands(d_N40_n20_p5)
estimand_label estimand
1 prop covid 0.5
> draw_estimands(d_N40_n20_p5)
estimand_label estimand
1 prop covid 0.4
> draw_estimates(d_N40_n20_p5)
estimator_label term estimate std.error statistic p.value conf.low conf.high df outcome
1 prop positive (Intercept) 0.4 0.1095445 3.651484 0.001697388 0.1707207 0.6292793 19 covid
>
@jwbowers
Copy link
Author

A fix from Alex Coppock that seems to work.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment