bbdaniels/minimum-detectable-effect.do

## minimum-detectable-effect.do
// Want to know the minimum detectable effect size?
// Explore power for different treatment effect sizes with a given sample size.

clear
set matsize 5000
cap mat drop results
qui forval te = 0(0.1)1 { // Loop over treatment effect sizes (te) of 0 to 1 standard deviations at increments of 0.1 SDs

	forval i=1/1000 { // Running this loop 1000 times
		clear
		set obs 1000 // Set sample size to 1000
		gen e=rnormal() // Include a normally distributed error term
		gen t=rnormal()>0 // Randomly assign treatment to half the population
		gen y=1+`te'*t+e // Set functional form for DGP (here we have some constant 1 plus the treatment effect times the positive normal dist plus error term
		reg y t // Run the regression for each loop and store the results in matrix below

		mat a = r(table)
		mat a = a[....,1]
		mat results = nullmat(results) \ a' , [`te']

		}
}

// Load the results into data
clear
svmat results , n(col)

// Analyze all the regressions we ran
gen sig = p <0.05
graph bar sig , over(c10) yline(0.8) //assuming we need power of 0.8, the graph shows for each treatment effect size (values on x-axis) the percent of times that the regression had significant results i.e. where p<0.05

## minimum-sample-size.do
// Want to know the minimum sample size?
// Dxplore power for different sample sizes with a given treatment effect size

clear
set matsize 5000
cap mat drop results
qui forval ss = 100(100)1000 { // Loop over sample sizes (ss) 100 to 1000 at increments of 100

	forval i=1/100 { // Running this loop 1000 times
		clear
		set obs `ss'
		gen e=rnormal() // Include a normally distributed error term
		gen t=rnormal()>0 // Randomly assign treatment to half the population
		gen y=1+0.2*t+e // Set the functional form for DGP. Here we are assuming the treatment effect size is 0.2 standard deviations
		reg y t

		mat a = r(table)
		mat a = a[....,1]
		mat results = nullmat(results) \ a' , [`ss']

		}
}

// Load the results into data
clear
svmat results , n(col)

// Analyze all the regressions we ran
gen sig = p <0.05
graph bar sig , over(c10) yline(0.8) // Assuming we need power of 0.8, the graph shows for each sample size (values on x-axis) the percent of times that the regression had significant results i.e. where p<0.05

## multiple-treatment-arms.do
// What if you have multiple treatment arms?
// With three treatment arms, explore power for different sample sizes with a given treatment effect size

clear
set matsize 5000
cap mat drop results
qui forval ss = 100(100)1000 { // Loop over sample sizes (ss) 100 to 1000 at increments of 100

	forval i=1/100 { // Run this loop 1000 times
		clear
		set obs `ss'
		gen e=rnormal() // Include a normally distributed error term
		xtile t = rnormal() ,n(4) // Shortcut to create three treatment arms
		tab t, gen(t) // Indicators for each treatment arm (1 = control)
		gen y=1+.2*t2 +.1*t3 +0*t4 +e //assuming first treatment arm has treatment effect of 0.2 std devs, second arm has treat effect 0.1 std devs, and third arm has treat effect 0 std devs
		reg y t2 t3 t4

		// Note the changes here in storing the data are specific to three arms:
		mat a = r(table)
		mat a = a[....,1..3]
		mat results = nullmat(results) \ a' , [`ss'\ `ss'\ `ss'], [1\ 2\ 3]

		}
}

clear
svmat results , n(col)

gen sig = p <0.05
graph bar sig, over(c10) over(c11) yline(0.8) //assuming we need power of 0.8, for each of the three treatment arms the graphs show for each sample size (values on x-axis) the percent of times that the regression had significant results i.e. where p<0.05
	// Want to know the minimum detectable effect size?
	// Explore power for different treatment effect sizes with a given sample size.

	clear
	set matsize 5000
	cap mat drop results
	qui forval te = 0(0.1)1 { // Loop over treatment effect sizes (te) of 0 to 1 standard deviations at increments of 0.1 SDs

	forval i=1/1000 { // Running this loop 1000 times
	clear
	set obs 1000 // Set sample size to 1000
	gen e=rnormal() // Include a normally distributed error term
	gen t=rnormal()>0 // Randomly assign treatment to half the population
	gen y=1+`te'*t+e // Set functional form for DGP (here we have some constant 1 plus the treatment effect times the positive normal dist plus error term
	reg y t // Run the regression for each loop and store the results in matrix below

	mat a = r(table)
	mat a = a[....,1]
	mat results = nullmat(results) \ a' , [`te']

	}
	}

	// Load the results into data
	clear
	svmat results , n(col)

	// Analyze all the regressions we ran
	gen sig = p <0.05
	graph bar sig , over(c10) yline(0.8) //assuming we need power of 0.8, the graph shows for each treatment effect size (values on x-axis) the percent of times that the regression had significant results i.e. where p<0.05
	// Want to know the minimum sample size?
	// Dxplore power for different sample sizes with a given treatment effect size

	clear
	set matsize 5000
	cap mat drop results
	qui forval ss = 100(100)1000 { // Loop over sample sizes (ss) 100 to 1000 at increments of 100

	forval i=1/100 { // Running this loop 1000 times
	clear
	set obs `ss'
	gen e=rnormal() // Include a normally distributed error term
	gen t=rnormal()>0 // Randomly assign treatment to half the population
	gen y=1+0.2*t+e // Set the functional form for DGP. Here we are assuming the treatment effect size is 0.2 standard deviations
	reg y t

	mat a = r(table)
	mat a = a[....,1]
	mat results = nullmat(results) \ a' , [`ss']

	}
	}

	// Load the results into data
	clear
	svmat results , n(col)

	// Analyze all the regressions we ran
	gen sig = p <0.05
	graph bar sig , over(c10) yline(0.8) // Assuming we need power of 0.8, the graph shows for each sample size (values on x-axis) the percent of times that the regression had significant results i.e. where p<0.05
	// What if you have multiple treatment arms?
	// With three treatment arms, explore power for different sample sizes with a given treatment effect size

	clear
	set matsize 5000
	cap mat drop results
	qui forval ss = 100(100)1000 { // Loop over sample sizes (ss) 100 to 1000 at increments of 100

	forval i=1/100 { // Run this loop 1000 times
	clear
	set obs `ss'
	gen e=rnormal() // Include a normally distributed error term
	xtile t = rnormal() ,n(4) // Shortcut to create three treatment arms
	tab t, gen(t) // Indicators for each treatment arm (1 = control)
	gen y=1+.2t2 +.1t3 +0*t4 +e //assuming first treatment arm has treatment effect of 0.2 std devs, second arm has treat effect 0.1 std devs, and third arm has treat effect 0 std devs
	reg y t2 t3 t4

	// Note the changes here in storing the data are specific to three arms:
	mat a = r(table)
	mat a = a[....,1..3]
	mat results = nullmat(results) \ a' , [`ss'\ `ss'\ `ss'], [1\ 2\ 3]

	}
	}

	clear
	svmat results , n(col)

	gen sig = p <0.05
	graph bar sig, over(c10) over(c11) yline(0.8) //assuming we need power of 0.8, for each of the three treatment arms the graphs show for each sample size (values on x-axis) the percent of times that the regression had significant results i.e. where p<0.05