Basic illustration of repeat testing of significance error in Perl
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Statistics::Gtest; | |
use Statistics::Distributions; | |
use Data::Dumper; | |
# | |
# Just set up some basics | |
# | |
# How many variants and participants to include | |
my $VARIANTS = 2; | |
my $SAMPLE_SIZE = 100_000; | |
my $PARTICIPANTS = $SAMPLE_SIZE * $VARIANTS; | |
# Conversion rate of mock experiments | |
my $CONVERSION_RATE = 3; | |
# Chisquare distribution levels | |
my $GTEST_CUTOFF_90 = 2.7105; # This means significance at .10 | |
my $GTEST_CUTOFF_95 = 3.8502; # This means significance at .05 | |
# Number of mock experiments to run | |
my $EXPERIMENTS = 1000; | |
# How many participants to do before starting checking | |
my $CHECK_CUTOFF = 1000; | |
# | |
# Counts | |
# | |
my $totals90 = 0; | |
my $totals95 = 0; | |
my $only_final90 = 0; | |
my $only_final95 = 0; | |
print <<EOT; | |
Running false positive checks with these metrics; | |
Sample size: $SAMPLE_SIZE | |
Variants: $VARIANTS | |
Conversion rate: $CONVERSION_RATE | |
Experiments: $EXPERIMENTS | |
EOT | |
# | |
# This is where the experiments are ran. | |
# | |
for my $experiment (1..$EXPERIMENTS) { | |
my @variant; | |
push @variant, [0, 0] for (0..$VARIANTS-1); | |
my $finding90 = 0; | |
my $finding95 = 0; | |
# | |
# One pretend experiment with $participants and the same $CONVERSION_RATE in | |
# each variant, randomly assigning each participant to a variant at time seen | |
# | |
for my $participant (0..$PARTICIPANTS) { | |
my $var = int rand( $VARIANTS ); | |
(rand(100)<=$CONVERSION_RATE) | |
? $variant[$var]->[0]++ # hits | |
: $variant[$var]->[1]++; # fails | |
next if $participant < $CHECK_CUTOFF; | |
# next unless int rand(100) == 5; | |
my $g = new Statistics::Gtest(\@variant); | |
my $float = $g->getG(); | |
$finding90++ if $float>=$GTEST_CUTOFF_90; | |
$finding95++ if $float>=$GTEST_CUTOFF_95; | |
} | |
my $g = new Statistics::Gtest(\@variant); | |
my $float = $g->getG(); | |
$only_final90++ if $float>=$GTEST_CUTOFF_90; | |
$only_final95++ if $float>=$GTEST_CUTOFF_95; | |
$totals90++ if $finding90; | |
$totals95++ if $finding95; | |
printf "Experiment %4s of $EXPERIMENTS: (%6s at p<.05) (%6s at p<0.10). Hitrate: $totals90 of $experiment at p<0.10, $totals95 of $experiment at p<0.05. (One check: $only_final90 of $experiment at p<0.1, $only_final95 of $experiment at p<0.05) \n", | |
$experiment, $finding95, $finding90; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment