A/B test calculations
################################################################################ | |
# A/B test calculations # | |
# # | |
# Based on Stats Engine # | |
# //pages.optimizely.com/rs/optimizely/images/stats_engine_technical_paper.pdf # | |
################################################################################ | |
################## | |
# Helper Methods # | |
################## | |
Z_MAX = 6 # Maxium +/- z value | |
# | |
# probability of normal z value | |
# Adapted from a polynomial approximation in: | |
# Ibbetson D, Algorithm 209 Collected Algorithms of the CACM 1963 p. 616 | |
# Note: This routine has six digit accuracy, so it is only useful for absolute | |
# z values <= 6. For z values > to 6.0, poz() returns 0.0 | |
# | |
# source: https://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html | |
# | |
poz = (z) -> | |
y = undefined | |
x = undefined | |
w = undefined | |
if z is 0.0 | |
x = 0.0 | |
else | |
y = 0.5 * Math.abs(z) | |
if y > Z_MAX * 0.5 | |
x = 1.0 | |
else if y < 1.0 | |
w = y * y | |
x = ((((((((0.000124818987 * w \ | |
- 0.001075204047) * w + 0.005198775019) * w \ | |
- 0.019198292004) * w + 0.059054035642) * w \ | |
- 0.151968751364) * w + 0.319152932694) * w \ | |
- 0.531923007300) * w + 0.797884560593) * y * 2.0 | |
else | |
y -= 2.0 | |
x = (((((((((((((-0.000045255659 * y \ | |
+ 0.000152529290) * y - 0.000019538132) * y \ | |
- 0.000676904986) * y + 0.001390604284) * y \ | |
- 0.000794620820) * y - 0.002034254874) * y \ | |
+ 0.006549791214) * y - 0.010557625006) * y \ | |
+ 0.011630447319) * y - 0.009279453341) * y \ | |
+ 0.005353579108) * y - 0.002141268741) * y \ | |
+ 0.000535310849) * y + 0.999936657524 | |
if z > 0.0 | |
return (x + 1.0) * 0.5 | |
else | |
return (1.0 - x) * 0.5 | |
# | |
# Compute critical normal z value to produce given p. | |
# We just do a bisection search for a value within CHI_EPSILON, | |
# relying on the monotonicity of pochisq(). | |
# | |
# source: https://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html | |
# | |
critz = (p) -> | |
Z_EPSILON = 0.000001 # Accuracy of z approximation | |
minz = -Z_MAX | |
maxz = Z_MAX | |
zval = 0.0 | |
pval = undefined | |
if p < 0.0 or p > 1.0 | |
return -1 | |
while maxz - minz > Z_EPSILON | |
pval = poz(zval) | |
if pval > p | |
maxz = zval | |
else | |
minz = zval | |
zval = (maxz + minz) * 0.5 | |
zval | |
proportionTrueNull = (results) -> | |
trueNull = _.sum results, ({isSignificant, xBar, yBar}) -> | |
if xBar > yBar and isSignificant | |
return 1 | |
else | |
return 0 | |
trueNull / results.length | |
############# | |
# Constants # | |
############# | |
tau = 0.5 # ???????? - free variable in significance | |
FDRthreshold = 0.10 | |
errorRate = 0.05 | |
data = | |
kpi1: | |
control: | |
rate: 0.2 | |
visitors: 10000 | |
var1: | |
rate: 0.22 | |
visitors: 10000 | |
kpi2: | |
control: | |
rate: 0.01 | |
visitors: 10000 | |
var1: | |
rate: 0.02 | |
visitors: 10000 | |
N = Object.keys(data.kpi1).length # number of a/b tests | |
####################### | |
# Conclusive Results # | |
# See (2) in paper # | |
####################### | |
conclusiveResults _.map data (data, kpi) -> | |
# You can imagine looking at more variables here too | |
xBar = kpi.control.rate | |
yBar = kpi.var1.rate | |
n = kpi.control.visitors + kpi.var1.visitors | |
thetaHat = yBar - xBar | |
alpha = errorRate | |
V = 2 * (xBar * (1 - xBar) + yBar * (1 - yBar)) / n | |
pHat = Math.sqrt( | |
(2 * Math.log(1 / alpha) - Math.log(V / (V + tau))) * | |
(V * (V + tau) / tau) | |
) | |
isSignificant = Math.abs(thetaHat) > pHat | |
{ | |
isSignificant | |
pHat | |
kpi | |
xBar | |
yBar | |
} | |
###################### | |
# Actionable Results # | |
# See (4) in paper # | |
###################### | |
FDRresults = _.map _.sortBy(conclusiveResults, 'pHat'), (result, index) -> | |
{pHat} = result | |
i = index + 1 | |
piZero = proportionTrueNull(conclusiveResults) | |
FDR = piZero * pHat / (i * N) | |
_.default {FDR}, result | |
# We can take action on these results! | |
significantResults = _.filter FDRresults, ({FDR}) -> | |
(1 - FDR) > FDRthreshold | |
######################## | |
# Confidence Intervals # | |
######################## | |
confidenceIntervals = _.map FDRresults, (result) -> | |
{xBar, yBar, isSignificant} = result | |
# Calculate std. deviation | |
mu = (xBar + yBar) / 2 | |
delta = Math.sqrt( | |
1 / 2 * | |
_.sum [xBar, yBar], (xi) -> | |
Math.pow(xi - mu, 2) | |
) | |
# See section 3.5 in paper | |
q = errorRate | |
m = significantResults.length | |
coverageLevel = if isSignificant | |
then (1 - q * m / N) | |
else (1 - q * (m + 1) / N) | |
z = critz(coverageLevel) | |
{ | |
low: yBar - z * delta / Math.sqrt(N) | |
high: yBar + z * delta / Math.sqrt(N) | |
} |
////////////////////////////////////////////////////////////////////////////////// | |
// A/B test calculations // | |
// // | |
// Based on Stats Engine // | |
// http://pages.optimizely.com/rs/optimizely/images/stats_engine_technical_paper.pdf // | |
////////////////////////////////////////////////////////////////////////////////// | |
var FDRresults, FDRthreshold, N, Z_MAX, confidenceIntervals, critz, data, errorRate, poz, proportionTrueNull, significantResults, tau; | |
Z_MAX = 6; | |
proportionTrueNull = function(results) { | |
var trueNull; | |
trueNull = _.sum(results, function(result) { | |
var isSignificant, xBar, yBar; | |
isSignificant = result.isSignificant, | |
xBar = result.xBar, | |
yBar = result.yBar; | |
if (xBar > yBar && isSignificant) { | |
return 1; | |
} else { | |
return 0; | |
} | |
}); | |
return trueNull / results.length; | |
}; | |
/////////////// | |
// Constants // | |
/////////////// | |
tau = 0.5; // ???????? - free variable in significance | |
FDRthreshold = 0.10; | |
errorRate = 0.05; | |
data = { | |
kpi1: { | |
control: { | |
rate: 0.2, | |
visitors: 10000 | |
}, | |
var1: { | |
rate: 0.22, | |
visitors: 10000 | |
} | |
}, | |
kpi2: { | |
control: { | |
rate: 0.01, | |
visitors: 10000 | |
}, | |
var1: { | |
rate: 0.02, | |
visitors: 10000 | |
} | |
} | |
}; | |
N = Object.keys(data.kpi1).length; // number of a/b tests | |
///////////////////////// | |
// Conclusive Results // | |
// See (2) in paper // | |
///////////////////////// | |
conclusiveResults = _.map(data(function(data, kpi) { | |
var V, alpha, isSignificant, n, pHat, thetaHat, xBar, yBar; | |
// You can imagine looking at more variables here too | |
xBar = kpi.control.rate; | |
yBar = kpi.var1.rate; | |
n = kpi.control.visitors + kpi.var1.visitors; | |
thetaHat = yBar - xBar; | |
alpha = errorRate; | |
V = 2 * (xBar * (1 - xBar) + yBar * (1 - yBar)) / n; | |
pHat = Math.sqrt( | |
(2 * Math.log(1 / alpha) - Math.log(V / (V + tau))) * | |
(V * (V + tau) / tau) | |
); | |
isSignificant = Math.abs(thetaHat) > pHat; | |
return { | |
isSignificant: isSignificant, | |
pHat: pHat, | |
kpi: kpi, | |
xBar: xBar, | |
yBar: yBar | |
}; | |
})); | |
//////////////////////// | |
// Actionable Results // | |
// See (4) in paper // | |
//////////////////////// | |
FDRresults = _.map(_.sortBy(conclusiveResults, 'pHat'), function(result, index) { | |
var FDR, i, pHat, piZero; | |
pHat = result.pHat; | |
i = index + 1; | |
piZero = proportionTrueNull(conclusiveResults); | |
FDR = piZero * pHat / (i * N); | |
return _.defaults({ | |
FDR: FDR | |
}, result); | |
}); | |
// We can take action on these results! | |
significantResults = _.filter(FDRresults, function(result) { | |
return (1 - result.FDR) > FDRthreshold; | |
}); | |
////////////////////////// | |
// Confidence Intervals // | |
////////////////////////// | |
confidenceIntervals = _.map(FDRresults, function(result) { | |
var coverageLevel, delta, isSignificant, m, mu, q, xBar, yBar, z; | |
xBar = result.xBar, | |
yBar = result.yBar, | |
isSignificant = result.isSignificant; | |
// Calculate std. deviation | |
mu = (xBar + yBar) / 2; | |
delta = Math.sqrt(1 / 2 * _.sum([xBar, yBar], function(xi) { | |
return Math.pow(xi - mu, 2); | |
})); | |
// See section 3.5 in paper | |
q = errorRate; | |
m = significantResults.length; | |
coverageLevel = isSignificant ? 1 - q * m / N : 1 - q * (m + 1) / N; | |
z = critz(coverageLevel); // convert coverageLevel to z-score | |
return { | |
low: yBar - z * delta / Math.sqrt(N), | |
high: yBar + z * delta / Math.sqrt(N) | |
}; | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment