Skip to content

Instantly share code, notes, and snippets.

Created June 17, 2012 18:33
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save anonymous/2945361 to your computer and use it in GitHub Desktop.
Save anonymous/2945361 to your computer and use it in GitHub Desktop.
Basic illustration of repeat testing of significance error in Perl
#!/usr/bin/perl
use strict;
use warnings;
use Statistics::Gtest;
use Statistics::Distributions;
use Data::Dumper;
#
# Just set up some basics
#
# How many variants and participants to include
my $VARIANTS = 2;
my $SAMPLE_SIZE = 100_000;
my $PARTICIPANTS = $SAMPLE_SIZE * $VARIANTS;
# Conversion rate of mock experiments
my $CONVERSION_RATE = 3;
# Chisquare distribution levels
my $GTEST_CUTOFF_90 = 2.7105; # This means significance at .10
my $GTEST_CUTOFF_95 = 3.8502; # This means significance at .05
# Number of mock experiments to run
my $EXPERIMENTS = 1000;
# How many participants to do before starting checking
my $CHECK_CUTOFF = 1000;
#
# Counts
#
my $totals90 = 0;
my $totals95 = 0;
my $only_final90 = 0;
my $only_final95 = 0;
print <<EOT;
Running false positive checks with these metrics;
Sample size: $SAMPLE_SIZE
Variants: $VARIANTS
Conversion rate: $CONVERSION_RATE
Experiments: $EXPERIMENTS
EOT
#
# This is where the experiments are ran.
#
for my $experiment (1..$EXPERIMENTS) {
my @variant;
push @variant, [0, 0] for (0..$VARIANTS-1);
my $finding90 = 0;
my $finding95 = 0;
#
# One pretend experiment with $participants and the same $CONVERSION_RATE in
# each variant, randomly assigning each participant to a variant at time seen
#
for my $participant (0..$PARTICIPANTS) {
my $var = int rand( $VARIANTS );
(rand(100)<=$CONVERSION_RATE)
? $variant[$var]->[0]++ # hits
: $variant[$var]->[1]++; # fails
next if $participant < $CHECK_CUTOFF;
# next unless int rand(100) == 5;
my $g = new Statistics::Gtest(\@variant);
my $float = $g->getG();
$finding90++ if $float>=$GTEST_CUTOFF_90;
$finding95++ if $float>=$GTEST_CUTOFF_95;
}
my $g = new Statistics::Gtest(\@variant);
my $float = $g->getG();
$only_final90++ if $float>=$GTEST_CUTOFF_90;
$only_final95++ if $float>=$GTEST_CUTOFF_95;
$totals90++ if $finding90;
$totals95++ if $finding95;
printf "Experiment %4s of $EXPERIMENTS: (%6s at p<.05) (%6s at p<0.10). Hitrate: $totals90 of $experiment at p<0.10, $totals95 of $experiment at p<0.05. (One check: $only_final90 of $experiment at p<0.1, $only_final95 of $experiment at p<0.05) \n",
$experiment, $finding95, $finding90;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment