/repeat_testing.pl

## repeat_testing.pl
#!/usr/bin/perl

use strict;
use warnings;
use Statistics::Gtest;
use Statistics::Distributions;
use Data::Dumper;

#
# Just set up some basics
#

# How many variants and participants to include
my $VARIANTS        = 2;
my $SAMPLE_SIZE     = 100_000;
my $PARTICIPANTS    = $SAMPLE_SIZE * $VARIANTS;

# Conversion rate of mock experiments
my $CONVERSION_RATE = 3;

# Chisquare distribution levels
my $GTEST_CUTOFF_90 = 2.7105; # This means significance at .10
my $GTEST_CUTOFF_95 = 3.8502; # This means significance at .05

# Number of mock experiments to run
my $EXPERIMENTS     = 1000;

# How many participants to do before starting checking
my $CHECK_CUTOFF    = 1000;


#
# Counts
#

my $totals90 = 0;
my $totals95 = 0;
my $only_final90 = 0;
my $only_final95 = 0;

print <<EOT;
Running false positive checks with these metrics;
Sample size:     $SAMPLE_SIZE
Variants:        $VARIANTS
Conversion rate: $CONVERSION_RATE
Experiments:     $EXPERIMENTS
EOT


#
# This is where the experiments are ran.
#

for my $experiment (1..$EXPERIMENTS) {

    my @variant;
    push @variant, [0, 0] for (0..$VARIANTS-1);

    my $finding90 = 0;
    my $finding95 = 0;

#
# One pretend experiment with $participants and the same $CONVERSION_RATE in
# each variant, randomly assigning each participant to a variant at time seen
#

    for my $participant (0..$PARTICIPANTS) {
        my $var = int rand( $VARIANTS );

        (rand(100)<=$CONVERSION_RATE)
            ? $variant[$var]->[0]++     # hits
            : $variant[$var]->[1]++;    # fails

        next if $participant < $CHECK_CUTOFF;
#        next unless int rand(100) == 5;

        my $g     = new Statistics::Gtest(\@variant);
        my $float = $g->getG();

        $finding90++ if $float>=$GTEST_CUTOFF_90;
        $finding95++ if $float>=$GTEST_CUTOFF_95;

    }

    my $g     = new Statistics::Gtest(\@variant);
    my $float = $g->getG();

    $only_final90++ if $float>=$GTEST_CUTOFF_90;
    $only_final95++ if $float>=$GTEST_CUTOFF_95;

    $totals90++ if $finding90;
    $totals95++ if $finding95;

    printf "Experiment %4s of $EXPERIMENTS: (%6s at p<.05) (%6s at p<0.10). Hitrate: $totals90 of $experiment at p<0.10, $totals95 of $experiment at p<0.05. (One check: $only_final90 of $experiment at p<0.1, $only_final95 of $experiment at p<0.05) \n",
    $experiment, $finding95, $finding90;

}
	#!/usr/bin/perl

	use strict;
	use warnings;
	use Statistics::Gtest;
	use Statistics::Distributions;
	use Data::Dumper;

	#
	# Just set up some basics
	#

	# How many variants and participants to include
	my $VARIANTS = 2;
	my $SAMPLE_SIZE = 100_000;
	my $PARTICIPANTS = $SAMPLE_SIZE * $VARIANTS;

	# Conversion rate of mock experiments
	my $CONVERSION_RATE = 3;

	# Chisquare distribution levels
	my $GTEST_CUTOFF_90 = 2.7105; # This means significance at .10
	my $GTEST_CUTOFF_95 = 3.8502; # This means significance at .05

	# Number of mock experiments to run
	my $EXPERIMENTS = 1000;

	# How many participants to do before starting checking
	my $CHECK_CUTOFF = 1000;


	#
	# Counts
	#

	my $totals90 = 0;
	my $totals95 = 0;
	my $only_final90 = 0;
	my $only_final95 = 0;

	print <<EOT;
	Running false positive checks with these metrics;
	Sample size: $SAMPLE_SIZE
	Variants: $VARIANTS
	Conversion rate: $CONVERSION_RATE
	Experiments: $EXPERIMENTS
	EOT


	#
	# This is where the experiments are ran.
	#

	for my $experiment (1..$EXPERIMENTS) {

	my @variant;
	push @variant, [0, 0] for (0..$VARIANTS-1);

	my $finding90 = 0;
	my $finding95 = 0;

	#
	# One pretend experiment with $participants and the same $CONVERSION_RATE in
	# each variant, randomly assigning each participant to a variant at time seen
	#

	for my $participant (0..$PARTICIPANTS) {
	my $var = int rand( $VARIANTS );

	(rand(100)<=$CONVERSION_RATE)
	? $variant[$var]->[0]++ # hits
	: $variant[$var]->[1]++; # fails

	next if $participant < $CHECK_CUTOFF;
	# next unless int rand(100) == 5;

	my $g = new Statistics::Gtest(\@variant);
	my $float = $g->getG();

	$finding90++ if $float>=$GTEST_CUTOFF_90;
	$finding95++ if $float>=$GTEST_CUTOFF_95;

	}

	my $g = new Statistics::Gtest(\@variant);
	my $float = $g->getG();

	$only_final90++ if $float>=$GTEST_CUTOFF_90;
	$only_final95++ if $float>=$GTEST_CUTOFF_95;

	$totals90++ if $finding90;
	$totals95++ if $finding95;

	printf "Experiment %4s of $EXPERIMENTS: (%6s at p<.05) (%6s at p<0.10). Hitrate: $totals90 of $experiment at p<0.10, $totals95 of $experiment at p<0.05. (One check: $only_final90 of $experiment at p<0.1, $only_final95 of $experiment at p<0.05) \n",
	$experiment, $finding95, $finding90;

	}