Skip to content

Instantly share code, notes, and snippets.

@danmoseley
Last active December 30, 2022 19:59
Show Gist options
  • Save danmoseley/784a25839978425ae15d037f587ac76b to your computer and use it in GitHub Desktop.
Save danmoseley/784a25839978425ae15d037f587ac76b to your computer and use it in GitHub Desktop.
test random with chi squared
using System.Diagnostics;
using System.Linq;
using System.Numerics;
internal class Program
{
public static void Main(string[] args)
{
// Check for an off-by-one type error causing us to consistently not generate values
// across the whole of the requested range
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next));
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next, 0, 50));
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next, -25, 25));
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next, 0, int.MaxValue));
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next, int.MinValue, int.MaxValue));
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64));
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64, 0, 50L));
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64, -25L, 25L));
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64, 0, long.MaxValue));
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64, long.MinValue, 0));
ChiSquaredTest(GetBucketizedFloats(Random.Shared.NextSingle));
ChiSquaredTest(GetBucketizedFloats(Random.Shared.NextDouble));
// To illustrate it is sensitive to an off-by-one:
//
static Func<T, T, T> FilterOneValue<T>(Func<T, T, T> func, T reject) where T : IEqualityOperators<T, T, bool> =>
(T min, T maxValue) =>
{
T sample;
while (reject == (sample = func(min, maxValue)));
return sample;
};
Console.Write(ChiSquaredTest(GetBucketizedBinaryIntegers(FilterOneValue((min, maxExclusive) => Random.Shared.Next(min, maxExclusive), reject: 49), 0, 50)));
Console.WriteLine("done");
Console.ReadLine();
}
static int[] GetBucketizedFloats<T>(Func<T> func) where T : INumber<T>
{
int[] buckets = new int[50];
for (int i = 0; i < 100_000; i++)
{
T sample = func();
Debug.Assert(T.Zero <= sample && sample < T.CreateSaturating(1));
buckets[(int)(decimal.CreateSaturating(sample) * buckets.Length)]++;
}
return buckets;
}
static int[] GetBucketizedBinaryIntegers<T>(Func<T> func) where T : INumber<T>, IMinMaxValue<T>, IModulusOperators<T, T, T> =>
GetBucketizedBinaryIntegers<T>((_) => func(), T.MaxValue);
static int[] GetBucketizedBinaryIntegers<T>(Func<T, T> func, T maxExclusive) where T : INumber<T>, IMinMaxValue<T>, IModulusOperators<T, T, T> =>
GetBucketizedBinaryIntegers<T>((_, _) => func(maxExclusive), T.Zero, maxExclusive);
static int[] GetBucketizedBinaryIntegers<T>(Func<T, T, T> func, T min, T maxExclusive) where T : INumber<T>, IModulusOperators<T, T, T>
{
Debug.Assert(min < maxExclusive);
int[] buckets = new int[50];
// Range must be a multiple of bucket count, unless it is large enough to not matter
decimal range = decimal.CreateSaturating(maxExclusive) - decimal.CreateSaturating(min);
Debug.Assert(range > 100_000m || range % buckets.Length == 0);
for (int i = 0; i < 100_000; i++)
{
T sample = func(min, maxExclusive);
int index = (int)((decimal.CreateSaturating(sample) - decimal.CreateSaturating(min)) % buckets.Length);
buckets[index]++;
}
return buckets;
}
// Provided an array of 50 buckets each containing a count, returns true if there is a less than 1/10**8
// probability that the sample originated from a random distribution.
static bool ChiSquaredTest(int[] buckets)
{
// Critical value depends on bucket count.
Debug.Assert(buckets.Length == 50);
// We expect a perfectly uniform distribution.
double sum = 0;
for (int i = 0; i < buckets.Length; i++)
{
sum += buckets[i];
}
double expectedPerBucket = sum / buckets.Length;
// Calculate chi-square statistic = sum of (observed - expected)**2 / expected
double chiSquare = 0;
foreach (int bucket in buckets)
{
chiSquare += Math.Pow(bucket - expectedPerBucket, 2) / expectedPerBucket;
}
// This is a "Chi Squared Goodness of fit" problem, so degrees of freedom is (#buckets - 1) which in this case is 49.
// We choose a significance level such that the chance of a false positive is 1/10**8.
// Plugging these into Excel formula `=CHISQ.INV.RT(0.00000001,49)` gives 126.
// If Chi Squared is greater than 126, the sample is very unlikely to be derived from the uniform distribution.
Console.WriteLine($"{chiSquare}");
Debug.Assert(chiSquare <= 126);
return (chiSquare <= 126);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment