Skip to content

Instantly share code, notes, and snippets.

@bellbind
Last active July 2, 2021 07:51
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bellbind/801c5de20aa4f310062363f3f95a099e to your computer and use it in GitHub Desktop.
Save bellbind/801c5de20aa4f310062363f3f95a099e to your computer and use it in GitHub Desktop.
[javascript] [statistics](both welch's and student's) t-test implementations
/*
// binomial coefficient nCk
function binom(n, k) {
k = n - k < k ? n - k : k;
const b = [1].concat(Array.from(Array(k), _ => 0));
for (let i = 1; i <= n; i++) {
for (let j = i < k ? i : k; j > 0; j--) b[j] += b[j - 1];
}
return b[k];
}
// distribution function of binomial distribution
function dBinom(k, n, p) {
return binom(n, k) * (p ** k) * ((1 - p) ** (n - k));
}
//*/
// lgamma (g=7) at: https://mrob.com/pub/ries/lanczos-gamma.html
const Pg0 = 0.99999999999980993227684700473478;
const Pg = [
676.520368121885098567009190444019,
-1259.13921672240287047156078755283,
771.3234287776530788486528258894,
-176.61502916214059906584551354,
12.507343278686904814458936853,
-0.13857109526572011689554707,
9.984369578019570859563e-6,
1.50563273514931155834e-7,
];
function lgamma(x) {
const {log, PI, sin, sqrt} = Math;
if (x < 0.5) return log(PI / sin(PI * x)) - lgamma(1 - x);
const a = Pg.reduce((r, p, i) => r + p / (x + i), Pg0);
const t = x + Pg.length - 1.5;
return log(sqrt(2 * PI)) + log(t) * (x - 0.5) - t + log(a);
}
function dBinom(k, n, p) {
// dBinom used log for larger p ** n < 2 ** -1024 case
const logBinom = lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1);
const logProb = Math.log(p) * k + Math.log(1 - p) * (n - k);
return Math.exp(logBinom + logProb);
}
// cumulative distribution function of binomial distribution
function cdBinom(k, n, p) {
return Array.from(Array(k + 1), (_, i) => dBinom(i, n , p)).
reduce((r, v) => r + v, 0);
}
// binomial test: probability of x-success counts in n-case (success rate p)
// - pLess: total of <= x success case
// - pGreat: total of x >= success case
// - pBoth: <= x success or x <= faulure case
function binomTest(x, n, p = 0.5) {
const pLess = cdBinom(x, n, p);
const pGreat = 1 - cdBinom(x - 1, n, p);
const pBoth = (_ => {
// from R's src/library/stats/R/binom.test.R
const m = n * p;
if (x === m) return 1;
const dp = dBinom(x, n, p);
const ns = Array.from(Array(n + 1), (_, i) => i);
if (x < m) {
const y = ns.slice(Math.ceil(m)).
filter(i => dBinom(i, n, p) <= dp).length;
return pLess + (1 - cdBinom(n - y, n, p));
} else {
const y = ns.slice(0, Math.ceil(m)).
filter(i => dBinom(i, n, p) <= dp).length;
return cdBinom(y - 1, n, p) + pGreat;
}
})();
return {pLess, pGreat, pBoth, x, n, p};
}
console.log(binomTest(1, 10));
console.log(binomTest(1, 10, 0.4));
// utils
function range(n) {
return [...Array(n).keys()];
}
function frac(n) {
let r = 1;
for (let i = n; i > 0; i -= 1) r *= i;
return r;
}
function dfrac(n) {
let r = 1;
for (let i = n; i > 0; i -= 2) r *= i;
return r;
}
function binom(n, k) {
k = n - k < k ? n - k : k;
const b = [1].concat(Array.from(Array(k), _ => 0));
for (let i = 1; i <= n; i++) {
for (let j = i < k ? i : k; j > 0; j--) b[j] += b[j - 1];
}
return b[k];
}
// matrix
function diag(ds) {
return range(ds.length).map(i => ds.map((v, j) => i === j ? v : 0));
}
function genMatrix(g, n) {
return range(n).map(i => range(n).map(j => g(i + 1, j + 1)));
}
function matMul(A, B) {
const n = A.length, m = B[0].length;
return A.map((ai, i) => range(m).map(
j => ai.reduce((s, a, k) => s + a * B[k][j], 0)));
}
function tr(M) {
const n = M.length, m = M[0].length;
return range(m).map(i => range(n).map(j => M[j][i]));
}
// impl: https://mrob.com/pub/ries/lanczos-gamma.html
function Dc(n) {
return diag(range(n + 1).map(k => 2 * dfrac(2 * k - 1)));
}
function cElem(row, col) {
if (col > row) return 0;
if (row === 1 && col === 1) return 0.5;
return ((-1) ** (row + col)) *
(4 ** (col - 1)) * (row - 1) * frac(row + col - 3) /
frac(row - col) / frac(2 * col - 2);
}
function C(n) {
return genMatrix(cElem, n + 1);
}
function f(g, n) {
return Math.sqrt(2) * (Math.E / (2 * (n + g) + 1)) ** (n + 0.5);
}
function Dr(k) {
return diag([1].concat(range(k).map(
n => -frac(2 * n + 2) / (2 * frac(n) * frac(n + 1)))));
}
function bElem(row, col) {
if (row === 1) return 1;
if (row > col) return 0;
return ((-1)**(col - row)) * binom(col + row - 3, 2 * row - 3);
}
function B(k) {
return genMatrix(bElem, k + 1);
}
function lanczos(g, n) {
const DB = matMul(Dr(n), B(n));
const CD = matMul(C(n), Dc(n));
const M = matMul(DB, CD);
const F = tr([range(n + 1).map(k => f(g, k))]);
return matMul(M, F);
}
function tlgl(g, n) {
return lanczos(g, n - 1).map(v => v * Math.exp(g) / Math.sqrt(2 * Math.PI));
}
// result: (compute twice higher float precision: float128 for double)
console.log("lanczos coeff g=5,n=7:", tlgl(5, 7));
console.log("lanczos coeff g=7,n=9:", tlgl(7, 9));
// Welch t-test: ported from C# code
// - https://msdn.microsoft.com/ja-jp/magazine/mt620016.aspx
// ACM Algorithm #209 Gauss
const P0 = [
+0.000124818987,
-0.001075204047,
+0.005198775019,
-0.019198292004,
+0.059054035642,
-0.151968751364,
+0.319152932694,
-0.531923007300,
+0.797884560593,
];
const P1 = [
-0.000045255659,
+0.000152529290,
-0.000019538132,
-0.000676904986,
+0.001390604284,
-0.000794620820,
-0.002034254874,
+0.006549791214,
-0.010557625006,
+0.011630447319,
-0.009279453341,
+0.005353579108,
-0.002141268741,
+0.000535310849,
+0.999936657524,
];
function pol(x, c) {
return c.reduce((r, c) => r * x + c, 0);
}
// integral amount of ND(mean=0, SD=1) returns between 0 and 1
// - gauss(z) = (1 + erf(z * 2**-0.5))/2
function gauss(z) {
if (z === 0) return 0.5;
const y = Math.abs(z) / 2;
const p = y >= 3 ? 1 : y < 1 ? pol(y * y, P0) * y * 2 : pol(y - 2, P1);
return z > 0 ? (1 + p) / 2 : (1 - p) / 2;
}
//console.log(norm(-3)); // ~~ 0.0013
//console.log(norm(-2)); // ~~ 0.023
//console.log(norm(-1)); // ~~ 0.16
// ACM Algorithm #395 (student t-distribution: df as double)
// t-dist : distribution for average of (df+1)-samples from ND(mean=0, SD=1)
// student(t, df) returns
// integral probability of both side area (< -t) and (t <) of t-dist(df)
const Ps = [-0.4, -3.3, -24, -85.5];
function student(t, df) {
const a = df - 0.5;
const b = 48 * a * a;
const y0 = (t * t) / df;
const y = a * (y0 > 1e-6 ? Math.log(y0 + 1) : y0);
const s = (pol(y, Ps) / (0.8 * y * y + 100 + b) + y + 3) / b + 1;
return 2 * gauss(-s * (y ** 0.5));
}
//console.log(student(1, 1000)); // ~~ 0.32
//console.log(student(2, 1000)); // ~~ 0.046
//console.log(student(3, 1000)); // ~~ 0.0027
// welch's t-test
function tTest(x, y) {
console.assert(x.length > 1 && y.length > 1);
const nX = x.length, nY = y.length, nX1 = nX - 1, nY1 = nY - 1;
const meanX = x.reduce((r, v) => r + v, 0) / nX;
const meanY = y.reduce((r, v) => r + v, 0) / nY;
const varX = x.reduce((r, v) => r + (v - meanX) ** 2, 0) / nX1;
const varY = y.reduce((r, v) => r + (v - meanY) ** 2, 0) / nY1;
// see: t and nu of https://en.wikipedia.org/wiki/Welch%27s_t-test
const avX = varX / nX, avY = varY / nY;
const t = (meanX - meanY) / ((avX + avY) ** 0.5);
const df = ((avX + avY) ** 2) / ((avX ** 2) / nX1 + (avY ** 2) / nY1);
const p = student(t, df);
return {p, t, df, meanX, meanY};
}
// example sets
const x = [88, 77, 78, 85, 90, 82, 88, 98, 90];
const y = [81, 72, 67, 81, 71, 70, 82, 81];
console.log(tTest(x, y));
//{ p: 0.003793077554352986,
// t: 3.4232952676183435,
// df: 14.936596725791068,
// meanX: 86.22222222222223,
// meanY: 75.625 }
// (0.3% when x and y are from same population)
// student t-test
function tTest0(x, y) {
console.assert(x.length === y.length && x.length > 1);
const n = x.length, n1 = n - 1;
const meanX = x.reduce((r, v) => r + v, 0) / n;
const meanY = y.reduce((r, v) => r + v, 0) / n;
const varX = x.reduce((r, v) => r + (v - meanX) ** 2, 0) / n1;
const varY = y.reduce((r, v) => r + (v - meanY) ** 2, 0) / n1;
// see: https://en.wikipedia.org/wiki/Student%27s_t-test#Equal_sample_sizes,_equal_variance
const t = (meanX - meanY) / (((varX + varY) / n) ** 0.5);
const df = n1 * 2; // as 2n-2
const p = student(t, df);
return {p, t, df, meanX, meanY};
}
// samples from
// - http://www.obihiro.ac.jp/~masuday/resources/stat/r_t-test01.html
const x0 = [57, 120, 101, 137, 119, 117, 104, 73, 53, 68, 118];
const y0 = [89, 30, 82, 50, 39, 22, 57, 32, 96, 31, 88];
console.log(tTest0(x0, y0));
//{ p: 0.003000142155019314,
// t: 3.376406863007222,
// df: 20,
// meanX: 97,
// meanY: 56 }
// (0.3% when x and y are from same population)
// paired t-test
function tPairedTest(x, y) {
console.assert(x.length === y.length && x.length > 1);
const n = x.length, n1 = n - 1;
const d = x.map((v, i) => v - y[i]);
const meanD = d.reduce((r, v) => r + v, 0) / n;
const varD = d.reduce((r, v) => r + (v - meanD) ** 2, 0) / n1;
const t = meanD / ((varD / n) ** 0.5);
const df = n1;
const p = student(t, df);
return {p, t, df, meanD, varD};
}
// from https://bellcurve.jp/statistics/course/9453.html
const x1 = [180, 130, 165, 155, 140];
const y1 = [150, 135, 145, 150, 140];
console.log(tPairedTest(x1, y1));
//{ p: 0.19982695443268672,
// t: 1.5339299776947408,
// df: 4,
// meanD: 10,
// varD: 212.5 }
//(19% when x1 and y1are from same population.
// usually "improved from x to y (population changed)" rejected with p > 5%)
/*
// gamma function just for natural number df of fisher(x, df1, df2)
function gamma(x) {
if (x === -0.5) return -2 * Math.PI ** 0.5;
if (x === 0.5) return Math.PI ** 0.5;
if (x === 1) return 1;
return (x - 1) * gamma(x - 1);
}
//*/
//*
// gamma function (Lanczos method)
// - https://en.wikipedia.org/wiki/Lanczos_approximation
const Pg0 = 0.99999999999980993227684700473478;
const Pg = [
676.520368121885098567009190444019,
-1259.13921672240287047156078755283,
771.3234287776530788486528258894,
-176.61502916214059906584551354,
12.507343278686904814458936853,
-0.13857109526572011689554707,
9.984369578019570859563e-6,
1.50563273514931155834e-7,
];
function gamma(x) {
const {PI, sin, sqrt} = Math;
if (x < 0.5) return PI / (sin(PI * x) * gamma(1 - x));
const a = Pg.reduce((r, p, i) => r + p / (x + i), Pg0);
const t = x + Pg.length - 1.5;
return sqrt(2 * PI) * (t ** (x - 0.5)) * Math.exp(-t) * a;
}
function lgamma(x) {
const {log, PI, sin, sqrt} = Math;
if (x < 0.5) return log(PI / sin(PI * x)) - lgamma(1 - x);
const a = Pg.reduce((r, p, i) => r + p / (x + i), Pg0);
const t = x + Pg.length - 1.5;
return log(sqrt(2 * PI)) + log(t) * (x - 0.5) - t + log(a);
}
//*/
function beta(a, b) {
return gamma(a) * gamma(b) / gamma(a + b);
}
function lbeta(a, b) {
return lgamma(a) + lgamma(b) - lgamma(a + b);
}
/*
function incbeta(x, a, b) {
console.assert(Number.isInteger(a * 2) && Number.isInteger(b * 2));
if (Number.isInteger(a) && !Number.isInteger(b))
return 1 - incbeta(1 - x, b, a);
if (b === 1) return x ** a;
if (a === 1) return 1 - (1 - x) ** b;
if (a === 0.5 && b === 0.5) return Math.asin(x ** 0.5) * 2 / Math.PI;
const [A, B, s] = a === 0.5 && b > 1 ?
[a, b - 1, 1 / (b - 1)] : [a - 1, b, -1 / (a - 1)];
return incbeta(x, A, B) + s * (x ** A) * ((1 - x) ** B) / beta(A, B);
}
//*/
//*
// Incomplete Beta : https://github.com/codeplea/incbeta/blob/master/incbeta.c
function incbeta(x, a, b) {
if (x < 0 || x > 1) return Infinity;
if (x > (a + 1) / (a + b + 2)) return 1 - incbeta(1 - x, b, a);
const {exp, log, abs} = Math;
const lbetaAB = lbeta(a, b);
const front = exp(log(x) * a + log(1 - x) * b - lbetaAB - log(a));
// == (x^a)((1-x)^b) / (beta(a, b)*a)
const TINY = 1e-30, STOP = 1e-8;
let f = 1, c = 1, d = 0;
for (let i = 0; i <= 200; i++) {
const m = i >> 1;
const n = i === 0 ? 1 : i % 2 === 0 ?
m * (b - m) * x / ((a + 2 * m - 1) * (a + 2 * m)) :
-(a + m) * (a + b + m) * x / ((a + 2 * m) * (a + 2 * m + 1));
d = 1 + n * d;
if (abs(d) < TINY) d = TINY;
d = 1 / d;
c = 1 + n / c;
if (abs(c) < TINY) c = TINY;
const cd = c * d;
f *= cd;
if (abs(1 - cd) < STOP) return front * (f - 1);
}
return Infinity;
}
//*/
// cumulative distribution function of F-distribution
function fisher(x, df1, df2) {
return incbeta(df1 * x / (df1 * x + df2), df1 / 2, df2 / 2);
}
// variance equality test (probalibity pBoth when x and y have same variances)
function varTest(x, y) {
const nX = x.length, nY = y.length, nX1 = nX - 1, nY1 = nY - 1;
const meanX = x.reduce((r, v) => r + v, 0) / nX;
const meanY = y.reduce((r, v) => r + v, 0) / nY;
const varX = x.reduce((r, v) => r + (v - meanX) ** 2, 0) / nX1;
const varY = y.reduce((r, v) => r + (v - meanY) ** 2, 0) / nY1;
const s = varX / varY, dfX = nX1, dfY = nY1;
const pLess = fisher(s, dfX, dfY);
const pGreater = 1 - pLess;
const pBoth = 2 * Math.min(pGreater, pLess);
return {pLess, pGreater, pBoth, s, dfX, dfY};
}
// from http://data-science.gr.jp/implementation/ist_r_f_test.html
const x = [301, 311, 325, 291, 388, 412, 325, 361, 287];
const y = [197, 180, 247, 260, 247, 199, 179, 134, 163, 200];
// from https://stats.biopapyrus.jp/stats/f-test.html
//const x = [10, 23, 19, 14, 41, 33, 36, 31, 50];
//const y = [14, 84, 44, 11, 36, 71, 34, 54, 61];
console.log(varTest(x, y));
@benjamingr
Copy link

Hey, in terms of licensing is it OK if I use this code :)?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment