Skip to content

Instantly share code, notes, and snippets.

@neenhouse
Last active January 19, 2024 15:18
Show Gist options
  • Save neenhouse/758265e2ce62038060cb0e5950f26955 to your computer and use it in GitHub Desktop.
Save neenhouse/758265e2ce62038060cb0e5950f26955 to your computer and use it in GitHub Desktop.
Simple AB Testing
import {describe, beforeEach, test, expect, jest} from '@jest/globals';
import {v4 as uuidv4} from 'uuid';
import {calculateBucket} from '../ab-testing';
function generateVisitorUuid(): string {
return uuidv4().replace(/-/g, '');
}
describe('ab-testing', () => {
beforeEach(() => {
jest.resetModules();
});
test('Buckets 100,000 visitorUuids between control and variant with less than 1% difference', async () => {
let buckets: any = {};
const runs = 100000;
for (let i = 0; i < runs; i++) {
const uuid = generateVisitorUuid();
const result = (await calculateBucket(uuid)) as string;
buckets[result] = buckets[result] || 0;
buckets[result]++;
}
const difference = Math.abs(buckets.control - buckets.variant) / runs;
expect(difference).toBeLessThan(0.01);
});
test('Samples ~10,000 / 100,000 (10%)', async () => {
let buckets: any = {};
const runs = 100000;
for (let i = 0; i < runs; i++) {
const uuid = generateVisitorUuid();
const result = (await calculateBucket(uuid, 0.1, 'salt')) as string;
buckets[result] = buckets[result] || 0;
buckets[result]++;
}
// Expect about 90,000 non-bucket decisions (90%) with less than 1% difference
expect(Math.abs(0.9 - buckets.false / runs)).toBeLessThan(0.01);
// Expect ~ 5000 control buckets (5%) with less than 1% difference
expect(Math.abs(0.05 - buckets.control / runs)).toBeLessThan(0.01);
// Expect ~ 5000 variant buckets (5%) with less than 1% difference
expect(Math.abs(0.05 - buckets.variant / runs)).toBeLessThan(0.01);
// Expect less than 1% difference between control / variant
const difference = Math.abs(buckets.control - buckets.variant) / runs;
expect(difference).toBeLessThan(0.01);
});
test('Samples ~1,000 / 100,000 (1%)', async () => {
let buckets: any = {};
const runs = 100000;
for (let i = 0; i < runs; i++) {
const uuid = generateVisitorUuid();
const result = (await calculateBucket(uuid, 0.01, 'salt')) as string;
buckets[result] = buckets[result] || 0;
buckets[result]++;
}
// Expect about 99,000 non-bucket decisions (99%) with less than 1% difference
expect(Math.abs(0.99 - buckets.false / runs)).toBeLessThan(0.01);
// Expect ~ 500 control buckets (.5%) with less than 1% difference
expect(Math.abs(0.005 - buckets.control / runs)).toBeLessThan(0.01);
// Expect ~ 500 variant buckets (.5%) with less than 1% difference
expect(Math.abs(0.005 - buckets.variant / runs)).toBeLessThan(0.01);
// Expect less than 1% difference between control / variant
const difference = Math.abs(buckets.control - buckets.variant) / runs;
expect(difference).toBeLessThan(0.01);
});
});
/**
* calculateBucket()
* Implements `crypto.subtle.digest` to create an MD5 hash based on visitorUuid + salt
* The MD5 hash (32 chars) is split into 4 secions and divided by 0xFFFFFFFF to produce a value ranging from 0 - 1.
* - [first 8 chars] - used to calculate sampling
* - [second 8 chars] - used to calculate bucket
* - [thrid 8 chars] - not used
* - [fourth 8 chars] - not used
* Each secion can support up to 4 billion values, which is sufficient for AB testing science and maximally performant
* @param visitorUuid {string} - 32 character uuid without hyphens
* @param sampleRate {number} a percentage value from 0 - 1
* @param salt {string} a string value to randomize bucketing strategy
* @returns {string | boolean} - control or variant
*/
export async function calculateBucket(
visitorUuid: string,
sampleRate = 1,
salt = 'udemy',
): Promise<string | boolean> {
// Create a input digest string with visitorUuid and a salt
const inputDigest = new TextEncoder().encode(`${visitorUuid}${salt}`);
// Digest visitorUUID with salt value
const md5Digest = await crypto.subtle.digest({name: 'md5'}, inputDigest);
// Grab the MD5 hash array and conver tit to 32 character MD5 hashn with native APIs
const md5HashArray = Array.from(new Uint8Array(md5Digest));
const md5Hash = md5HashArray.map((b) => b.toString(16).padStart(2, '0')).join('');
// If sample rate is less than 100%, we need to calculate if we are sampling
if (sampleRate < 1) {
// Use first 8 chfaracters of md5 hash (4+ billion poassible combos) and divide
// largest number to calculate value of 0.0 - 1.0 value;
const sampleRateFound = parseInt(md5Hash.slice(0, 8), 16) / 0xffffffff;
// If we find a higher value, we are outside of sample size
if (sampleRateFound > sampleRate) {
return false;
}
}
// Use second 8/32 character space to calculate bucketing decision
const bucketingValueFound = parseInt(md5Hash.slice(8, 16), 16) / 0xffffffff;
// 3rd and 4th section of md5Hash reserved for other functionality needing pseudorandom generated range
// const otherBucket = parseInt(md5Hash.slice(16, 24), 16) / 0xffffffff;
// const otherBucket = parseInt(md5Hash.slice(24, 32), 16) / 0xffffffff;
// Return bucketing decision
return bucketingValueFound > 0.5 ? 'variant' : 'control';
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment