Last active
December 7, 2018 20:05
-
-
Save bbdaniels/774d5e5e31f32b74ec91bcb914453ae1 to your computer and use it in GitHub Desktop.
Power calculations by simulation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Want to know the minimum detectable effect size? | |
// Explore power for different treatment effect sizes with a given sample size. | |
clear | |
set matsize 5000 | |
cap mat drop results | |
qui forval te = 0(0.1)1 { // Loop over treatment effect sizes (te) of 0 to 1 standard deviations at increments of 0.1 SDs | |
forval i=1/1000 { // Running this loop 1000 times | |
clear | |
set obs 1000 // Set sample size to 1000 | |
gen e=rnormal() // Include a normally distributed error term | |
gen t=rnormal()>0 // Randomly assign treatment to half the population | |
gen y=1+`te'*t+e // Set functional form for DGP (here we have some constant 1 plus the treatment effect times the positive normal dist plus error term | |
reg y t // Run the regression for each loop and store the results in matrix below | |
mat a = r(table) | |
mat a = a[....,1] | |
mat results = nullmat(results) \ a' , [`te'] | |
} | |
} | |
// Load the results into data | |
clear | |
svmat results , n(col) | |
// Analyze all the regressions we ran | |
gen sig = p <0.05 | |
graph bar sig , over(c10) yline(0.8) //assuming we need power of 0.8, the graph shows for each treatment effect size (values on x-axis) the percent of times that the regression had significant results i.e. where p<0.05 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Want to know the minimum sample size? | |
// Dxplore power for different sample sizes with a given treatment effect size | |
clear | |
set matsize 5000 | |
cap mat drop results | |
qui forval ss = 100(100)1000 { // Loop over sample sizes (ss) 100 to 1000 at increments of 100 | |
forval i=1/100 { // Running this loop 1000 times | |
clear | |
set obs `ss' | |
gen e=rnormal() // Include a normally distributed error term | |
gen t=rnormal()>0 // Randomly assign treatment to half the population | |
gen y=1+0.2*t+e // Set the functional form for DGP. Here we are assuming the treatment effect size is 0.2 standard deviations | |
reg y t | |
mat a = r(table) | |
mat a = a[....,1] | |
mat results = nullmat(results) \ a' , [`ss'] | |
} | |
} | |
// Load the results into data | |
clear | |
svmat results , n(col) | |
// Analyze all the regressions we ran | |
gen sig = p <0.05 | |
graph bar sig , over(c10) yline(0.8) // Assuming we need power of 0.8, the graph shows for each sample size (values on x-axis) the percent of times that the regression had significant results i.e. where p<0.05 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// What if you have multiple treatment arms? | |
// With three treatment arms, explore power for different sample sizes with a given treatment effect size | |
clear | |
set matsize 5000 | |
cap mat drop results | |
qui forval ss = 100(100)1000 { // Loop over sample sizes (ss) 100 to 1000 at increments of 100 | |
forval i=1/100 { // Run this loop 1000 times | |
clear | |
set obs `ss' | |
gen e=rnormal() // Include a normally distributed error term | |
xtile t = rnormal() ,n(4) // Shortcut to create three treatment arms | |
tab t, gen(t) // Indicators for each treatment arm (1 = control) | |
gen y=1+.2*t2 +.1*t3 +0*t4 +e //assuming first treatment arm has treatment effect of 0.2 std devs, second arm has treat effect 0.1 std devs, and third arm has treat effect 0 std devs | |
reg y t2 t3 t4 | |
// Note the changes here in storing the data are specific to three arms: | |
mat a = r(table) | |
mat a = a[....,1..3] | |
mat results = nullmat(results) \ a' , [`ss'\ `ss'\ `ss'], [1\ 2\ 3] | |
} | |
} | |
clear | |
svmat results , n(col) | |
gen sig = p <0.05 | |
graph bar sig, over(c10) over(c11) yline(0.8) //assuming we need power of 0.8, for each of the three treatment arms the graphs show for each sample size (values on x-axis) the percent of times that the regression had significant results i.e. where p<0.05 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment