jwbowers/pop_sampling_dd1.R

## pop_sampling_dd1.R
## We have many group homes (like nursing homes) and few COVID tests.
## We want to test a simple random sample within each home to estimate the proportion positive with COVID (or detect
## any COVID)

## So, each place has a fixed population (N) and we will sample from this place.
## The population should be unique for combination of N and covidprob (which is just .02 in the simulations, but higher for
## testing here).

## I'd like to use the nice DeclareDesign functions like redesign() to assess
## power to detect COVID given different proportions of sampling for each population size


pop_fn <- function(N,covidprob){
    ## each N and covidprob should produce the same population each time
    ## The population of the nursing home is fixed.
    #set.seed(12345)
    #fixed_pop <- fabricate(N=N,covidprob=covidprob,covid=rbinom(N,size=1,prob=covidprob))
    fixed_pop <- data.frame(covid=rep(c(0, 1), c(N*(1-covidprob), N*covidprob)))
    return(fixed_pop)
}


pop <- declare_population(handler=pop_fn,N,covidprob)

custom_sampler <- function(data,prop_samp){
    samp_dat <- sample_n(data,floor(nrow(data)*prop_samp))
    ## covid column records true positive versus negative
    ## use a 10% error rate to chance positives into negatives
    flip <- rbinom(n=sum(samp_dat$covid),size=1,prob=.1)
    ## covid_detected column records what our tests tell us (currently missing 10% of positives)
    samp_dat$covid_detected <- with(samp_dat,ifelse(covid==1,(1-flip)*covid[covid==1],0))
    ## Make an finite population correction column for use of svydesign, svyciprop, etc..
    samp_dat$fpc <- rep(prop_samp,nrow(samp_dat))
    return(samp_dat)
}

samp <- declare_sampling(handler = custom_sampler, prop_samp=prop_samp)

## The estimand is simply the proportion of people positive for covid in the facility (of a given size N)
estimand <- declare_estimand(mean(covid),label="prop covid")

estimator3 <- declare_estimator(covid_detected~1,model=lm_robust, se_type="HC2", label="lm_err")

dtest_template <- pop + estimand + samp + estimator3
dtest <- redesign(dtest_template,
                  N=200, ## 200 person place
                  prop_samp=.5, ## sample .5
                  covidprob = .02) ## .02 positive covid
## Population is fixed so estimands should not vary acros samples
> draw_estimands(dtest)
  estimand_label estimand
1     prop covid     0.03
> draw_estimands(dtest)
  estimand_label estimand
1     prop covid     0.03
> dtest_dat1 <- draw_data(dtest)
> dtest_dat2 <- draw_data(dtest)
> stopifnot(!all.equal(dtest_dat1,dtest_dat2))
Error: !all.equal(dtest_dat1, dtest_dat2) is not TRUE
> draw_estimates(dtest)
  estimator_label        term estimate  std.error statistic   p.value     conf.low  conf.high df        outcome
1          lm_err (Intercept)     0.02 0.01407053  1.421411 0.1583399 -0.007918983 0.04791898 99 covid_detected
> draw_estimates(dtest)
  estimator_label        term estimate  std.error statistic   p.value     conf.low  conf.high df        outcome
1          lm_err (Intercept)     0.02 0.01407053  1.421411 0.1583399 -0.007918983 0.04791898 99 covid_detected
>

#### OLD STUFF
> N <- 40
> covidprob <- .5
> n <- 20
> pop <- declare_population(N, covid = rbinom(N, size = 1, prob = covidprob))
> fixed_pop <- pop()
> design_fixed <-
+ declare_population(data = fixed_pop) +
+ declare_sampling(n=n) +
+ declare_estimand(mean(covid),label="prop covid")
> draw_estimands(design_fixed)
  estimand_label estimand
1     prop covid     0.55
> draw_estimands(design_fixed)
  estimand_label estimand
1     prop covid      0.6
> draw_estimands(design_fixed)
  estimand_label estimand
1     prop covid     0.65
> draw_estimands(design_fixed)
  estimand_label estimand
1     prop covid      0.7


##### OLD STUFF
> pop <- declare_population(N,
+                           covid=rbinom(N,size=1,prob=covidprob))
> samp  <- declare_sampling(n=n)
> sim_design <-  pop + samp
> set.seed(12345)
> tmp <- draw_data(redesign(sim_design,N=40,n=20,covidprob=.5))
> table(tmp$covid)

 0  1
 9 11
> stopifnot( abs( mean(tmp$covid) - .5 ) < .1)
> estimand <- declare_estimand(mean(covid),label="prop covid")
> estimator1 <- declare_estimator(covid~1,model=lm_robust, se_type="HC0", label="prop positive")
> estimator2 <- declare_estimator(,label="prop pos with error")
> sim_design_est <- sim_design + estimand + estimator1 ## + estimator2
> d_N40_n20_p5  <- redesign(sim_design_est,N=40,n=20,covidprob=.5)
> draw_estimands(d_N40_n20_p5)
  estimand_label estimand
1     prop covid      0.6
> draw_estimands(d_N40_n20_p5)
  estimand_label estimand
1     prop covid      0.5
> draw_estimands(d_N40_n20_p5)
  estimand_label estimand
1     prop covid      0.4
> draw_estimates(d_N40_n20_p5)
  estimator_label        term estimate std.error statistic     p.value  conf.low conf.high df outcome
1   prop positive (Intercept)      0.4 0.1095445  3.651484 0.001697388 0.1707207 0.6292793 19   covid
>
	## We have many group homes (like nursing homes) and few COVID tests.
	## We want to test a simple random sample within each home to estimate the proportion positive with COVID (or detect
	## any COVID)

	## So, each place has a fixed population (N) and we will sample from this place.
	## The population should be unique for combination of N and covidprob (which is just .02 in the simulations, but higher for
	## testing here).

	## I'd like to use the nice DeclareDesign functions like redesign() to assess
	## power to detect COVID given different proportions of sampling for each population size


	pop_fn <- function(N,covidprob){
	## each N and covidprob should produce the same population each time
	## The population of the nursing home is fixed.
	#set.seed(12345)
	#fixed_pop <- fabricate(N=N,covidprob=covidprob,covid=rbinom(N,size=1,prob=covidprob))
	fixed_pop <- data.frame(covid=rep(c(0, 1), c(N(1-covidprob), Ncovidprob)))
	return(fixed_pop)
	}


	pop <- declare_population(handler=pop_fn,N,covidprob)

	custom_sampler <- function(data,prop_samp){
	samp_dat <- sample_n(data,floor(nrow(data)*prop_samp))
	## covid column records true positive versus negative
	## use a 10% error rate to chance positives into negatives
	flip <- rbinom(n=sum(samp_dat$covid),size=1,prob=.1)
	## covid_detected column records what our tests tell us (currently missing 10% of positives)
	samp_dat$covid_detected <- with(samp_dat,ifelse(covid==1,(1-flip)*covid[covid==1],0))
	## Make an finite population correction column for use of svydesign, svyciprop, etc..
	samp_dat$fpc <- rep(prop_samp,nrow(samp_dat))
	return(samp_dat)
	}

	samp <- declare_sampling(handler = custom_sampler, prop_samp=prop_samp)

	## The estimand is simply the proportion of people positive for covid in the facility (of a given size N)
	estimand <- declare_estimand(mean(covid),label="prop covid")

	estimator3 <- declare_estimator(covid_detected~1,model=lm_robust, se_type="HC2", label="lm_err")

	dtest_template <- pop + estimand + samp + estimator3
	dtest <- redesign(dtest_template,
	N=200, ## 200 person place
	prop_samp=.5, ## sample .5
	covidprob = .02) ## .02 positive covid
	## Population is fixed so estimands should not vary acros samples
	> draw_estimands(dtest)
	estimand_label estimand
	1 prop covid 0.03
	> draw_estimands(dtest)
	estimand_label estimand
	1 prop covid 0.03
	> dtest_dat1 <- draw_data(dtest)
	> dtest_dat2 <- draw_data(dtest)
	> stopifnot(!all.equal(dtest_dat1,dtest_dat2))
	Error: !all.equal(dtest_dat1, dtest_dat2) is not TRUE
	> draw_estimates(dtest)
	estimator_label term estimate std.error statistic p.value conf.low conf.high df outcome
	1 lm_err (Intercept) 0.02 0.01407053 1.421411 0.1583399 -0.007918983 0.04791898 99 covid_detected
	> draw_estimates(dtest)
	estimator_label term estimate std.error statistic p.value conf.low conf.high df outcome
	1 lm_err (Intercept) 0.02 0.01407053 1.421411 0.1583399 -0.007918983 0.04791898 99 covid_detected
	>

	#### OLD STUFF
	> N <- 40
	> covidprob <- .5
	> n <- 20
	> pop <- declare_population(N, covid = rbinom(N, size = 1, prob = covidprob))
	> fixed_pop <- pop()
	> design_fixed <-
	+ declare_population(data = fixed_pop) +
	+ declare_sampling(n=n) +
	+ declare_estimand(mean(covid),label="prop covid")
	> draw_estimands(design_fixed)
	estimand_label estimand
	1 prop covid 0.55
	> draw_estimands(design_fixed)
	estimand_label estimand
	1 prop covid 0.6
	> draw_estimands(design_fixed)
	estimand_label estimand
	1 prop covid 0.65
	> draw_estimands(design_fixed)
	estimand_label estimand
	1 prop covid 0.7


	##### OLD STUFF
	> pop <- declare_population(N,
	+ covid=rbinom(N,size=1,prob=covidprob))
	> samp <- declare_sampling(n=n)
	> sim_design <- pop + samp
	> set.seed(12345)
	> tmp <- draw_data(redesign(sim_design,N=40,n=20,covidprob=.5))
	> table(tmp$covid)

	0 1
	9 11
	> stopifnot( abs( mean(tmp$covid) - .5 ) < .1)
	> estimand <- declare_estimand(mean(covid),label="prop covid")
	> estimator1 <- declare_estimator(covid~1,model=lm_robust, se_type="HC0", label="prop positive")
	> estimator2 <- declare_estimator(,label="prop pos with error")
	> sim_design_est <- sim_design + estimand + estimator1 ## + estimator2
	> d_N40_n20_p5 <- redesign(sim_design_est,N=40,n=20,covidprob=.5)
	> draw_estimands(d_N40_n20_p5)
	estimand_label estimand
	1 prop covid 0.6
	> draw_estimands(d_N40_n20_p5)
	estimand_label estimand
	1 prop covid 0.5
	> draw_estimands(d_N40_n20_p5)
	estimand_label estimand
	1 prop covid 0.4
	> draw_estimates(d_N40_n20_p5)
	estimator_label term estimate std.error statistic p.value conf.low conf.high df outcome
	1 prop positive (Intercept) 0.4 0.1095445 3.651484 0.001697388 0.1707207 0.6292793 19 covid
	>