Skip to content

Instantly share code, notes, and snippets.

@stasSajinDD

stasSajinDD/.py Secret

Created February 20, 2024 15:19
Show Gist options
  • Save stasSajinDD/80da3e0b9946fe35c3123496f6a65ec4 to your computer and use it in GitHub Desktop.
Save stasSajinDD/80da3e0b9946fe35c3123496f6a65ec4 to your computer and use it in GitHub Desktop.
dilution_example
from scipy.stats import ttest_ind, norm
from statsmodels.stats.power import TTestIndPower
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
# Set parameters for the simulation
effect_size_for_80_power = 0.177 # Maps to 80% power
std_dev = 1
alpha = 0.05
n_users = 500
dilution_levels = list(range(0, 96, 5))
def calculate_power_with_dilution(dilution_percent, effect_size, n_users, std_dev, alpha):
n_treatable = int(n_users * (1 - dilution_percent / 100))
n_not_treatable = n_users - n_treatable
# Generate data for treatable users
control_group_treatable = norm.rvs(size=n_treatable, loc=0, scale=std_dev)
treatment_group_treatable = norm.rvs(size=n_treatable, loc=effect_size, scale=std_dev)
# Generate data for non-treatable users (no effect)
control_group_not_treatable = norm.rvs(size=n_not_treatable, loc=0, scale=std_dev)
treatment_group_not_treatable = norm.rvs(size=n_not_treatable, loc=0, scale=std_dev)
# Combine data
control_group = np.concatenate((control_group_treatable, control_group_not_treatable))
treatment_group = np.concatenate((treatment_group_treatable, treatment_group_not_treatable))
# Perform t-test
_, p_value = ttest_ind(treatment_group, control_group)
is_significant = p_value < alpha
return is_significant
def calculate_sample_size(dilution_percent, effect_size, n_users, std_dev, alpha, n_simulations=1000):
required_n = []
for dilution in dilution_levels:
significant_results = 0
for _ in range(n_simulations):
is_significant = calculate_power_with_dilution(dilution, effect_size, n_users, std_dev, alpha)
if is_significant:
significant_results += 1
if dilution > 0:
effect_size_diluted = effect_size * (1-dilution/100)
else:
effect_size_diluted = effect_size
# Estimate required sample size to achieve 80% power at this dilution level
analysis = TTestIndPower()
n_required_per_group = analysis.solve_power(effect_size=effect_size_diluted, alpha=alpha, power=0.8, alternative='two-sided')
required_n.append(n_required_per_group * 2)
return required_n
required_n_per_group = calculate_sample_size(dilution_levels, effect_size_for_80_power, n_users, std_dev, alpha)
plt.figure(figsize=(12, 8))
plt.plot(dilution_levels, required_n_per_group, marker='x', linestyle='-', color='green', markersize=10, linewidth=2)
plt.title('Required Sample Size to Maintain 80% Power', fontsize=20, fontweight='bold')
plt.xlabel('Dilution Level (%)', fontsize=16)
plt.ylabel('Required N (log scale)', fontsize=16)
plt.xticks(dilution_levels, fontsize=16)
plt.yticks(required_n_per_group, fontsize=16)
plt.yscale('log')
ax = plt.gca()
ax.yaxis.set_major_locator(ticker.LogLocator(base=10, numticks=15))
ax.yaxis.set_minor_locator(ticker.LogLocator(base=10, subs='auto', numticks=15))
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x) if x >= 1 else '0'))
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment