This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from statsmodels.stats.proportion import proportions_ztest | |
# Sample data for combined_data (with model_type and converted columns) | |
data = { | |
'lead_score': [85, 70, 60, 45, 90, 75, 80, 55, 65, 78, 85, 68, 72, 90, 82, 75, 70, 88], | |
'converted': [1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1], | |
'model_type': ['rule_based'] * 9 + ['ml_based'] * 9 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from statsmodels.stats.proportion import proportions_ztest | |
# Separate data for rule_based and ml_based model types | |
rule_data = combined_data[combined_data['model_type'] == 'rule_based'].sample(n=sample_size_rule, random_state=1) | |
ml_data = combined_data[combined_data['model_type'] == 'ml_based'].sample(n=sample_size_ml, random_state=1) | |
# Calculate number of conversions and total leads for each model type | |
rule_converted = rule_data['converted'].sum() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from statsmodels.stats.power import TTestIndPower | |
# Define conversion rates and desired effect size | |
baseline_conversion_rate = 0.1 | |
desired_effect_size = 0.1 | |
alpha = 0.05 | |
power = 0.8 | |
# Calculate required sample size per group | |
number_of_leads_per_group = TTestIndPower().solve_power( |