public
anonymous / repeat_testing.pl
Created

Basic illustration of repeat testing of significance error in Perl

  • Download Gist
repeat_testing.pl
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
#!/usr/bin/perl
 
use strict;
use warnings;
use Statistics::Gtest;
use Statistics::Distributions;
use Data::Dumper;
 
#
# Just set up some basics
#
 
# How many variants and participants to include
my $VARIANTS = 2;
my $SAMPLE_SIZE = 100_000;
my $PARTICIPANTS = $SAMPLE_SIZE * $VARIANTS;
 
# Conversion rate of mock experiments
my $CONVERSION_RATE = 3;
 
# Chisquare distribution levels
my $GTEST_CUTOFF_90 = 2.7105; # This means significance at .10
my $GTEST_CUTOFF_95 = 3.8502; # This means significance at .05
 
# Number of mock experiments to run
my $EXPERIMENTS = 1000;
 
# How many participants to do before starting checking
my $CHECK_CUTOFF = 1000;
 
 
#
# Counts
#
 
my $totals90 = 0;
my $totals95 = 0;
my $only_final90 = 0;
my $only_final95 = 0;
 
print <<EOT;
Running false positive checks with these metrics;
Sample size: $SAMPLE_SIZE
Variants: $VARIANTS
Conversion rate: $CONVERSION_RATE
Experiments: $EXPERIMENTS
EOT
 
 
#
# This is where the experiments are ran.
#
 
for my $experiment (1..$EXPERIMENTS) {
 
my @variant;
push @variant, [0, 0] for (0..$VARIANTS-1);
my $finding90 = 0;
my $finding95 = 0;
 
#
# One pretend experiment with $participants and the same $CONVERSION_RATE in
# each variant, randomly assigning each participant to a variant at time seen
#
 
for my $participant (0..$PARTICIPANTS) {
my $var = int rand( $VARIANTS );
(rand(100)<=$CONVERSION_RATE)
? $variant[$var]->[0]++ # hits
: $variant[$var]->[1]++; # fails
next if $participant < $CHECK_CUTOFF;
# next unless int rand(100) == 5;
my $g = new Statistics::Gtest(\@variant);
my $float = $g->getG();
 
$finding90++ if $float>=$GTEST_CUTOFF_90;
$finding95++ if $float>=$GTEST_CUTOFF_95;
}
 
my $g = new Statistics::Gtest(\@variant);
my $float = $g->getG();
 
$only_final90++ if $float>=$GTEST_CUTOFF_90;
$only_final95++ if $float>=$GTEST_CUTOFF_95;
$totals90++ if $finding90;
$totals95++ if $finding95;
 
printf "Experiment %4s of $EXPERIMENTS: (%6s at p<.05) (%6s at p<0.10). Hitrate: $totals90 of $experiment at p<0.10, $totals95 of $experiment at p<0.05. (One check: $only_final90 of $experiment at p<0.1, $only_final95 of $experiment at p<0.05) \n",
$experiment, $finding95, $finding90;
 
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.