BioSciEconomist/Power Analysis Binary.py

## Power Analysis Binary.py
# *-----------------------------------------------------------------
# | PROGRAM NAME: Power Analysis Binary.py
# | DATE: 09/30/21
# | CREATED BY: MATT BOGARD
# | PROJECT FILE:
# *----------------------------------------------------------------
# | PURPOSE: power calculation and sample size scenarios for binary outcomes
# *----------------------------------------------------------------

import pandas as pd
import numpy as np
import statsmodels
import statsmodels.stats.power as smp
import statsmodels.stats.api as sms
from statsmodels.stats.proportion import proportion_effectsize

#-------------------------------------
# basic power calculation
#-------------------------------------

# this is full compliance or TOT level (as if everyone that gets the email takes action regardless of clickthrough)

p1 = .25 # baseline outcome proportion
p2 = .30 # this is the observed proportion we need to see in the treatment group to meet our definition of success

# effect size
h = sms.proportion_effectsize(p2,p1)
print(h)

pwr = .80 # desired level of fidelity/power

fp = .10 # specify alpha or our acceptable 'false positive' rate


# Calculate the required sample size

# while R has a power calculation package specific to proportions, python relies on the z-test equivalent
# https://www.statsmodels.org/devel/stats.html
# https://www.statsmodels.org/devel/generated/statsmodels.stats.power.NormalIndPower.html#statsmodels.stats.power.NormalIndPower


# Parameters and Assumptions

# NormalIndPower.solve_power(effect_size=None, nobs1=None, alpha=None, power=None, ratio=1.0, alternative='two-sided')
# solve for any one parameter of the power of a two sample z-test
# exactly one needs to be None, all others need numeric values
# effect_size# standardized effect size
# nobs1: number of observations of sample 1. The number of observations of sample two is ratio times the size of sample 1, i.e. nobs2 = nobs1 * ratio ratio can be set to zero in order to get the power for a one sample test.
# alpha:float in interval (0,1) significance level, e.g. 0.05, is the probability of a type I error, that is wrong rejections if the Null Hypothesis is true.
# power:float in interval (0,1) power of the test, e.g. 0.8, is one minus the probability of a type II error. Power is the probability that the test correctly rejects the Null Hypothesis if the Alternative Hypothesis is true.
# ratio:ratio of the number of observations in sample 2 relative to sample 1. see description of nobs1 The default for ratio is 1; to solve for ration given the other arguments it has to be explicitly set to None.
# alternative:str, ‘two-sided’ (default), ‘larger’, ‘smaller’
# extra argument to choose whether the power is calculated for a two-sided (default) or one sided test.
# The one-sided test can be either ‘larger’, ‘smaller’.


result = smp.NormalIndPower().solve_power(effect_size =h,nobs1=None, power=pwr, alpha= fp, alternative='two-sided',
                                          ratio = 1)

print('Sample Size Per Group: %.3f' % result)


#----------------------------------
# adjustments for non-compliance
#--------------------------------

# considering that many will not see the emails or open to actually be 'nudged' by our design
# only a few people will be driving the result - that means the observed difference we expect
# to see at the test vs control level will be diluted
# (i.e. a 3% impact on those that are nudged by engaging content will show up as a very small
# diluted effect when comparing the difference between the randomized test and control group
# overall)

# for these reasons we have to adjust sample sizes for expected engagement

# Reference: Friedman LM, Furberg CD, DeMets DL. Fundamentals of Clinical Trials. 3. New York, NY: Springer; 1998.
#  Adjusting sample size to compensate for nonadherence; pp. 107–108

# "Using Randomization in Development Economics Research: A Toolkit",
# Duflo, Esther and Glennerster, Rachel and Kremer, Michael,
# National Bureau of Economic Research, Working Paper,Technical Working Paper Series,
# No. 333", 2006.

N = 985 # required sample size per group with full engagement
Eng = .03 # expected level of engagement (in this case click-through rates)
Ro = (1- Eng) # expected proportion of dropouts or subjects that fail to engage
Ri = 0 # expected proportion of 'drop ins' or subjects in the control group that somehow get treatment

N_ITT = N/((1-Ro-Ri)**2) # adjusted N
print(N_ITT) # sample size given non-compliance

# note this is the same as 1/k^2 implied in the Duflo guide to field experiments assuming one sided non-compliance
# where k = level of engagement

N/Eng**2


#
# sample size scenarios
#

p1 = .25  # baseline outcome
p2 = np.arange(.26, .36, 0.01) # hypothetical range of treatment group proportions
pwr = .80 # desired level of power
fp = .10  # desired false positive rate (alpha)

# define effect size function (calcualtes effect sizes for given treatment and control scenarios)
def effect_sizes(p1,p2):
    return 2 * (np.arcsin(np.sqrt(p2)) - np.arcsin(np.sqrt(p1)))

# define sample size function (calculates sample size given effect size 'd' and required power and alpha)
def sample_size(d):
    return smp.NormalIndPower().solve_power(d,nobs1=None, power=pwr, alpha=fp, alternative='two-sided', ratio = 1)

d = [effect_sizes(p1,i) for i in p2] # calculate range of effect sizes for given baseline and treatment scenarios
nSizes = [sample_size(i) for i in d] # calcuate range of required sample sizes per group


# adjustment for non-compliance / engagement

# define function to adjust for one-sided non-compliance (requires entering expected engagement)
def adj_sizes(eng,Ri,n):
    # eng = level or proportion of engagement
    # Ri = proportion of control group that gets treatment (or similar treatment)
    Ro = (1- eng) # proportion of dropouts or non-engagers in the treatment goup
    return n/((1-Ro-Ri)**2) # sample size adjustment


nSizesAdj = [adj_sizes(.03,0,i) for i in nSizes] # adjusted for engagement level (!!!check manually entered engagment level!!)


df = pd.DataFrame(columns=['Control','Treatment','Difference','EffectSize','RequiredNPerGroup','Engagement','AdjustedN'])
df['Control'] = [p1 for i in range(len(d))]
df['Treatment'] = p2
df['Difference'] =  df['Treatment'] - df['Control']
df['EffectSize'] = np.round(d,2)
df['RequiredNPerGroup'] = np.round(nSizes)
df['Engagement'] = .03 # specify expected engagement
df['AdjustedN'] = np.round(nSizesAdj)
df.head(len(df))
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: Power Analysis Binary.py
	# \| DATE: 09/30/21
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: power calculation and sample size scenarios for binary outcomes
	# *----------------------------------------------------------------

	import pandas as pd
	import numpy as np
	import statsmodels
	import statsmodels.stats.power as smp
	import statsmodels.stats.api as sms
	from statsmodels.stats.proportion import proportion_effectsize

	#-------------------------------------
	# basic power calculation
	#-------------------------------------

	# this is full compliance or TOT level (as if everyone that gets the email takes action regardless of clickthrough)

	p1 = .25 # baseline outcome proportion
	p2 = .30 # this is the observed proportion we need to see in the treatment group to meet our definition of success

	# effect size
	h = sms.proportion_effectsize(p2,p1)
	print(h)

	pwr = .80 # desired level of fidelity/power

	fp = .10 # specify alpha or our acceptable 'false positive' rate


	# Calculate the required sample size

	# while R has a power calculation package specific to proportions, python relies on the z-test equivalent
	# https://www.statsmodels.org/devel/stats.html
	# https://www.statsmodels.org/devel/generated/statsmodels.stats.power.NormalIndPower.html#statsmodels.stats.power.NormalIndPower


	# Parameters and Assumptions

	# NormalIndPower.solve_power(effect_size=None, nobs1=None, alpha=None, power=None, ratio=1.0, alternative='two-sided')
	# solve for any one parameter of the power of a two sample z-test
	# exactly one needs to be None, all others need numeric values
	# effect_size# standardized effect size
	# nobs1: number of observations of sample 1. The number of observations of sample two is ratio times the size of sample 1, i.e. nobs2 = nobs1 * ratio ratio can be set to zero in order to get the power for a one sample test.
	# alpha:float in interval (0,1) significance level, e.g. 0.05, is the probability of a type I error, that is wrong rejections if the Null Hypothesis is true.
	# power:float in interval (0,1) power of the test, e.g. 0.8, is one minus the probability of a type II error. Power is the probability that the test correctly rejects the Null Hypothesis if the Alternative Hypothesis is true.
	# ratio:ratio of the number of observations in sample 2 relative to sample 1. see description of nobs1 The default for ratio is 1; to solve for ration given the other arguments it has to be explicitly set to None.
	# alternative:str, ‘two-sided’ (default), ‘larger’, ‘smaller’
	# extra argument to choose whether the power is calculated for a two-sided (default) or one sided test.
	# The one-sided test can be either ‘larger’, ‘smaller’.


	result = smp.NormalIndPower().solve_power(effect_size =h,nobs1=None, power=pwr, alpha= fp, alternative='two-sided',
	ratio = 1)

	print('Sample Size Per Group: %.3f' % result)


	#----------------------------------
	# adjustments for non-compliance
	#--------------------------------

	# considering that many will not see the emails or open to actually be 'nudged' by our design
	# only a few people will be driving the result - that means the observed difference we expect
	# to see at the test vs control level will be diluted
	# (i.e. a 3% impact on those that are nudged by engaging content will show up as a very small
	# diluted effect when comparing the difference between the randomized test and control group
	# overall)

	# for these reasons we have to adjust sample sizes for expected engagement

	# Reference: Friedman LM, Furberg CD, DeMets DL. Fundamentals of Clinical Trials. 3. New York, NY: Springer; 1998.
	# Adjusting sample size to compensate for nonadherence; pp. 107–108

	# "Using Randomization in Development Economics Research: A Toolkit",
	# Duflo, Esther and Glennerster, Rachel and Kremer, Michael,
	# National Bureau of Economic Research, Working Paper,Technical Working Paper Series,
	# No. 333", 2006.

	N = 985 # required sample size per group with full engagement
	Eng = .03 # expected level of engagement (in this case click-through rates)
	Ro = (1- Eng) # expected proportion of dropouts or subjects that fail to engage
	Ri = 0 # expected proportion of 'drop ins' or subjects in the control group that somehow get treatment

	N_ITT = N/((1-Ro-Ri)**2) # adjusted N
	print(N_ITT) # sample size given non-compliance

	# note this is the same as 1/k^2 implied in the Duflo guide to field experiments assuming one sided non-compliance
	# where k = level of engagement

	N/Eng**2


	#
	# sample size scenarios
	#

	p1 = .25 # baseline outcome
	p2 = np.arange(.26, .36, 0.01) # hypothetical range of treatment group proportions
	pwr = .80 # desired level of power
	fp = .10 # desired false positive rate (alpha)

	# define effect size function (calcualtes effect sizes for given treatment and control scenarios)
	def effect_sizes(p1,p2):
	return 2 * (np.arcsin(np.sqrt(p2)) - np.arcsin(np.sqrt(p1)))

	# define sample size function (calculates sample size given effect size 'd' and required power and alpha)
	def sample_size(d):
	return smp.NormalIndPower().solve_power(d,nobs1=None, power=pwr, alpha=fp, alternative='two-sided', ratio = 1)

	d = [effect_sizes(p1,i) for i in p2] # calculate range of effect sizes for given baseline and treatment scenarios
	nSizes = [sample_size(i) for i in d] # calcuate range of required sample sizes per group


	# adjustment for non-compliance / engagement

	# define function to adjust for one-sided non-compliance (requires entering expected engagement)
	def adj_sizes(eng,Ri,n):
	# eng = level or proportion of engagement
	# Ri = proportion of control group that gets treatment (or similar treatment)
	Ro = (1- eng) # proportion of dropouts or non-engagers in the treatment goup
	return n/((1-Ro-Ri)**2) # sample size adjustment


	nSizesAdj = [adj_sizes(.03,0,i) for i in nSizes] # adjusted for engagement level (!!!check manually entered engagment level!!)


	df = pd.DataFrame(columns=['Control','Treatment','Difference','EffectSize','RequiredNPerGroup','Engagement','AdjustedN'])
	df['Control'] = [p1 for i in range(len(d))]
	df['Treatment'] = p2
	df['Difference'] = df['Treatment'] - df['Control']
	df['EffectSize'] = np.round(d,2)
	df['RequiredNPerGroup'] = np.round(nSizes)
	df['Engagement'] = .03 # specify expected engagement
	df['AdjustedN'] = np.round(nSizesAdj)
	df.head(len(df))