Last active
July 2, 2021 07:51
-
-
Save bellbind/801c5de20aa4f310062363f3f95a099e to your computer and use it in GitHub Desktop.
[javascript] [statistics](both welch's and student's) t-test implementations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
// binomial coefficient nCk | |
function binom(n, k) { | |
k = n - k < k ? n - k : k; | |
const b = [1].concat(Array.from(Array(k), _ => 0)); | |
for (let i = 1; i <= n; i++) { | |
for (let j = i < k ? i : k; j > 0; j--) b[j] += b[j - 1]; | |
} | |
return b[k]; | |
} | |
// distribution function of binomial distribution | |
function dBinom(k, n, p) { | |
return binom(n, k) * (p ** k) * ((1 - p) ** (n - k)); | |
} | |
//*/ | |
// lgamma (g=7) at: https://mrob.com/pub/ries/lanczos-gamma.html | |
const Pg0 = 0.99999999999980993227684700473478; | |
const Pg = [ | |
676.520368121885098567009190444019, | |
-1259.13921672240287047156078755283, | |
771.3234287776530788486528258894, | |
-176.61502916214059906584551354, | |
12.507343278686904814458936853, | |
-0.13857109526572011689554707, | |
9.984369578019570859563e-6, | |
1.50563273514931155834e-7, | |
]; | |
function lgamma(x) { | |
const {log, PI, sin, sqrt} = Math; | |
if (x < 0.5) return log(PI / sin(PI * x)) - lgamma(1 - x); | |
const a = Pg.reduce((r, p, i) => r + p / (x + i), Pg0); | |
const t = x + Pg.length - 1.5; | |
return log(sqrt(2 * PI)) + log(t) * (x - 0.5) - t + log(a); | |
} | |
function dBinom(k, n, p) { | |
// dBinom used log for larger p ** n < 2 ** -1024 case | |
const logBinom = lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1); | |
const logProb = Math.log(p) * k + Math.log(1 - p) * (n - k); | |
return Math.exp(logBinom + logProb); | |
} | |
// cumulative distribution function of binomial distribution | |
function cdBinom(k, n, p) { | |
return Array.from(Array(k + 1), (_, i) => dBinom(i, n , p)). | |
reduce((r, v) => r + v, 0); | |
} | |
// binomial test: probability of x-success counts in n-case (success rate p) | |
// - pLess: total of <= x success case | |
// - pGreat: total of x >= success case | |
// - pBoth: <= x success or x <= faulure case | |
function binomTest(x, n, p = 0.5) { | |
const pLess = cdBinom(x, n, p); | |
const pGreat = 1 - cdBinom(x - 1, n, p); | |
const pBoth = (_ => { | |
// from R's src/library/stats/R/binom.test.R | |
const m = n * p; | |
if (x === m) return 1; | |
const dp = dBinom(x, n, p); | |
const ns = Array.from(Array(n + 1), (_, i) => i); | |
if (x < m) { | |
const y = ns.slice(Math.ceil(m)). | |
filter(i => dBinom(i, n, p) <= dp).length; | |
return pLess + (1 - cdBinom(n - y, n, p)); | |
} else { | |
const y = ns.slice(0, Math.ceil(m)). | |
filter(i => dBinom(i, n, p) <= dp).length; | |
return cdBinom(y - 1, n, p) + pGreat; | |
} | |
})(); | |
return {pLess, pGreat, pBoth, x, n, p}; | |
} | |
console.log(binomTest(1, 10)); | |
console.log(binomTest(1, 10, 0.4)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// utils | |
function range(n) { | |
return [...Array(n).keys()]; | |
} | |
function frac(n) { | |
let r = 1; | |
for (let i = n; i > 0; i -= 1) r *= i; | |
return r; | |
} | |
function dfrac(n) { | |
let r = 1; | |
for (let i = n; i > 0; i -= 2) r *= i; | |
return r; | |
} | |
function binom(n, k) { | |
k = n - k < k ? n - k : k; | |
const b = [1].concat(Array.from(Array(k), _ => 0)); | |
for (let i = 1; i <= n; i++) { | |
for (let j = i < k ? i : k; j > 0; j--) b[j] += b[j - 1]; | |
} | |
return b[k]; | |
} | |
// matrix | |
function diag(ds) { | |
return range(ds.length).map(i => ds.map((v, j) => i === j ? v : 0)); | |
} | |
function genMatrix(g, n) { | |
return range(n).map(i => range(n).map(j => g(i + 1, j + 1))); | |
} | |
function matMul(A, B) { | |
const n = A.length, m = B[0].length; | |
return A.map((ai, i) => range(m).map( | |
j => ai.reduce((s, a, k) => s + a * B[k][j], 0))); | |
} | |
function tr(M) { | |
const n = M.length, m = M[0].length; | |
return range(m).map(i => range(n).map(j => M[j][i])); | |
} | |
// impl: https://mrob.com/pub/ries/lanczos-gamma.html | |
function Dc(n) { | |
return diag(range(n + 1).map(k => 2 * dfrac(2 * k - 1))); | |
} | |
function cElem(row, col) { | |
if (col > row) return 0; | |
if (row === 1 && col === 1) return 0.5; | |
return ((-1) ** (row + col)) * | |
(4 ** (col - 1)) * (row - 1) * frac(row + col - 3) / | |
frac(row - col) / frac(2 * col - 2); | |
} | |
function C(n) { | |
return genMatrix(cElem, n + 1); | |
} | |
function f(g, n) { | |
return Math.sqrt(2) * (Math.E / (2 * (n + g) + 1)) ** (n + 0.5); | |
} | |
function Dr(k) { | |
return diag([1].concat(range(k).map( | |
n => -frac(2 * n + 2) / (2 * frac(n) * frac(n + 1))))); | |
} | |
function bElem(row, col) { | |
if (row === 1) return 1; | |
if (row > col) return 0; | |
return ((-1)**(col - row)) * binom(col + row - 3, 2 * row - 3); | |
} | |
function B(k) { | |
return genMatrix(bElem, k + 1); | |
} | |
function lanczos(g, n) { | |
const DB = matMul(Dr(n), B(n)); | |
const CD = matMul(C(n), Dc(n)); | |
const M = matMul(DB, CD); | |
const F = tr([range(n + 1).map(k => f(g, k))]); | |
return matMul(M, F); | |
} | |
function tlgl(g, n) { | |
return lanczos(g, n - 1).map(v => v * Math.exp(g) / Math.sqrt(2 * Math.PI)); | |
} | |
// result: (compute twice higher float precision: float128 for double) | |
console.log("lanczos coeff g=5,n=7:", tlgl(5, 7)); | |
console.log("lanczos coeff g=7,n=9:", tlgl(7, 9)); | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Welch t-test: ported from C# code | |
// - https://msdn.microsoft.com/ja-jp/magazine/mt620016.aspx | |
// ACM Algorithm #209 Gauss | |
const P0 = [ | |
+0.000124818987, | |
-0.001075204047, | |
+0.005198775019, | |
-0.019198292004, | |
+0.059054035642, | |
-0.151968751364, | |
+0.319152932694, | |
-0.531923007300, | |
+0.797884560593, | |
]; | |
const P1 = [ | |
-0.000045255659, | |
+0.000152529290, | |
-0.000019538132, | |
-0.000676904986, | |
+0.001390604284, | |
-0.000794620820, | |
-0.002034254874, | |
+0.006549791214, | |
-0.010557625006, | |
+0.011630447319, | |
-0.009279453341, | |
+0.005353579108, | |
-0.002141268741, | |
+0.000535310849, | |
+0.999936657524, | |
]; | |
function pol(x, c) { | |
return c.reduce((r, c) => r * x + c, 0); | |
} | |
// integral amount of ND(mean=0, SD=1) returns between 0 and 1 | |
// - gauss(z) = (1 + erf(z * 2**-0.5))/2 | |
function gauss(z) { | |
if (z === 0) return 0.5; | |
const y = Math.abs(z) / 2; | |
const p = y >= 3 ? 1 : y < 1 ? pol(y * y, P0) * y * 2 : pol(y - 2, P1); | |
return z > 0 ? (1 + p) / 2 : (1 - p) / 2; | |
} | |
//console.log(norm(-3)); // ~~ 0.0013 | |
//console.log(norm(-2)); // ~~ 0.023 | |
//console.log(norm(-1)); // ~~ 0.16 | |
// ACM Algorithm #395 (student t-distribution: df as double) | |
// t-dist : distribution for average of (df+1)-samples from ND(mean=0, SD=1) | |
// student(t, df) returns | |
// integral probability of both side area (< -t) and (t <) of t-dist(df) | |
const Ps = [-0.4, -3.3, -24, -85.5]; | |
function student(t, df) { | |
const a = df - 0.5; | |
const b = 48 * a * a; | |
const y0 = (t * t) / df; | |
const y = a * (y0 > 1e-6 ? Math.log(y0 + 1) : y0); | |
const s = (pol(y, Ps) / (0.8 * y * y + 100 + b) + y + 3) / b + 1; | |
return 2 * gauss(-s * (y ** 0.5)); | |
} | |
//console.log(student(1, 1000)); // ~~ 0.32 | |
//console.log(student(2, 1000)); // ~~ 0.046 | |
//console.log(student(3, 1000)); // ~~ 0.0027 | |
// welch's t-test | |
function tTest(x, y) { | |
console.assert(x.length > 1 && y.length > 1); | |
const nX = x.length, nY = y.length, nX1 = nX - 1, nY1 = nY - 1; | |
const meanX = x.reduce((r, v) => r + v, 0) / nX; | |
const meanY = y.reduce((r, v) => r + v, 0) / nY; | |
const varX = x.reduce((r, v) => r + (v - meanX) ** 2, 0) / nX1; | |
const varY = y.reduce((r, v) => r + (v - meanY) ** 2, 0) / nY1; | |
// see: t and nu of https://en.wikipedia.org/wiki/Welch%27s_t-test | |
const avX = varX / nX, avY = varY / nY; | |
const t = (meanX - meanY) / ((avX + avY) ** 0.5); | |
const df = ((avX + avY) ** 2) / ((avX ** 2) / nX1 + (avY ** 2) / nY1); | |
const p = student(t, df); | |
return {p, t, df, meanX, meanY}; | |
} | |
// example sets | |
const x = [88, 77, 78, 85, 90, 82, 88, 98, 90]; | |
const y = [81, 72, 67, 81, 71, 70, 82, 81]; | |
console.log(tTest(x, y)); | |
//{ p: 0.003793077554352986, | |
// t: 3.4232952676183435, | |
// df: 14.936596725791068, | |
// meanX: 86.22222222222223, | |
// meanY: 75.625 } | |
// (0.3% when x and y are from same population) | |
// student t-test | |
function tTest0(x, y) { | |
console.assert(x.length === y.length && x.length > 1); | |
const n = x.length, n1 = n - 1; | |
const meanX = x.reduce((r, v) => r + v, 0) / n; | |
const meanY = y.reduce((r, v) => r + v, 0) / n; | |
const varX = x.reduce((r, v) => r + (v - meanX) ** 2, 0) / n1; | |
const varY = y.reduce((r, v) => r + (v - meanY) ** 2, 0) / n1; | |
// see: https://en.wikipedia.org/wiki/Student%27s_t-test#Equal_sample_sizes,_equal_variance | |
const t = (meanX - meanY) / (((varX + varY) / n) ** 0.5); | |
const df = n1 * 2; // as 2n-2 | |
const p = student(t, df); | |
return {p, t, df, meanX, meanY}; | |
} | |
// samples from | |
// - http://www.obihiro.ac.jp/~masuday/resources/stat/r_t-test01.html | |
const x0 = [57, 120, 101, 137, 119, 117, 104, 73, 53, 68, 118]; | |
const y0 = [89, 30, 82, 50, 39, 22, 57, 32, 96, 31, 88]; | |
console.log(tTest0(x0, y0)); | |
//{ p: 0.003000142155019314, | |
// t: 3.376406863007222, | |
// df: 20, | |
// meanX: 97, | |
// meanY: 56 } | |
// (0.3% when x and y are from same population) | |
// paired t-test | |
function tPairedTest(x, y) { | |
console.assert(x.length === y.length && x.length > 1); | |
const n = x.length, n1 = n - 1; | |
const d = x.map((v, i) => v - y[i]); | |
const meanD = d.reduce((r, v) => r + v, 0) / n; | |
const varD = d.reduce((r, v) => r + (v - meanD) ** 2, 0) / n1; | |
const t = meanD / ((varD / n) ** 0.5); | |
const df = n1; | |
const p = student(t, df); | |
return {p, t, df, meanD, varD}; | |
} | |
// from https://bellcurve.jp/statistics/course/9453.html | |
const x1 = [180, 130, 165, 155, 140]; | |
const y1 = [150, 135, 145, 150, 140]; | |
console.log(tPairedTest(x1, y1)); | |
//{ p: 0.19982695443268672, | |
// t: 1.5339299776947408, | |
// df: 4, | |
// meanD: 10, | |
// varD: 212.5 } | |
//(19% when x1 and y1are from same population. | |
// usually "improved from x to y (population changed)" rejected with p > 5%) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
// gamma function just for natural number df of fisher(x, df1, df2) | |
function gamma(x) { | |
if (x === -0.5) return -2 * Math.PI ** 0.5; | |
if (x === 0.5) return Math.PI ** 0.5; | |
if (x === 1) return 1; | |
return (x - 1) * gamma(x - 1); | |
} | |
//*/ | |
//* | |
// gamma function (Lanczos method) | |
// - https://en.wikipedia.org/wiki/Lanczos_approximation | |
const Pg0 = 0.99999999999980993227684700473478; | |
const Pg = [ | |
676.520368121885098567009190444019, | |
-1259.13921672240287047156078755283, | |
771.3234287776530788486528258894, | |
-176.61502916214059906584551354, | |
12.507343278686904814458936853, | |
-0.13857109526572011689554707, | |
9.984369578019570859563e-6, | |
1.50563273514931155834e-7, | |
]; | |
function gamma(x) { | |
const {PI, sin, sqrt} = Math; | |
if (x < 0.5) return PI / (sin(PI * x) * gamma(1 - x)); | |
const a = Pg.reduce((r, p, i) => r + p / (x + i), Pg0); | |
const t = x + Pg.length - 1.5; | |
return sqrt(2 * PI) * (t ** (x - 0.5)) * Math.exp(-t) * a; | |
} | |
function lgamma(x) { | |
const {log, PI, sin, sqrt} = Math; | |
if (x < 0.5) return log(PI / sin(PI * x)) - lgamma(1 - x); | |
const a = Pg.reduce((r, p, i) => r + p / (x + i), Pg0); | |
const t = x + Pg.length - 1.5; | |
return log(sqrt(2 * PI)) + log(t) * (x - 0.5) - t + log(a); | |
} | |
//*/ | |
function beta(a, b) { | |
return gamma(a) * gamma(b) / gamma(a + b); | |
} | |
function lbeta(a, b) { | |
return lgamma(a) + lgamma(b) - lgamma(a + b); | |
} | |
/* | |
function incbeta(x, a, b) { | |
console.assert(Number.isInteger(a * 2) && Number.isInteger(b * 2)); | |
if (Number.isInteger(a) && !Number.isInteger(b)) | |
return 1 - incbeta(1 - x, b, a); | |
if (b === 1) return x ** a; | |
if (a === 1) return 1 - (1 - x) ** b; | |
if (a === 0.5 && b === 0.5) return Math.asin(x ** 0.5) * 2 / Math.PI; | |
const [A, B, s] = a === 0.5 && b > 1 ? | |
[a, b - 1, 1 / (b - 1)] : [a - 1, b, -1 / (a - 1)]; | |
return incbeta(x, A, B) + s * (x ** A) * ((1 - x) ** B) / beta(A, B); | |
} | |
//*/ | |
//* | |
// Incomplete Beta : https://github.com/codeplea/incbeta/blob/master/incbeta.c | |
function incbeta(x, a, b) { | |
if (x < 0 || x > 1) return Infinity; | |
if (x > (a + 1) / (a + b + 2)) return 1 - incbeta(1 - x, b, a); | |
const {exp, log, abs} = Math; | |
const lbetaAB = lbeta(a, b); | |
const front = exp(log(x) * a + log(1 - x) * b - lbetaAB - log(a)); | |
// == (x^a)((1-x)^b) / (beta(a, b)*a) | |
const TINY = 1e-30, STOP = 1e-8; | |
let f = 1, c = 1, d = 0; | |
for (let i = 0; i <= 200; i++) { | |
const m = i >> 1; | |
const n = i === 0 ? 1 : i % 2 === 0 ? | |
m * (b - m) * x / ((a + 2 * m - 1) * (a + 2 * m)) : | |
-(a + m) * (a + b + m) * x / ((a + 2 * m) * (a + 2 * m + 1)); | |
d = 1 + n * d; | |
if (abs(d) < TINY) d = TINY; | |
d = 1 / d; | |
c = 1 + n / c; | |
if (abs(c) < TINY) c = TINY; | |
const cd = c * d; | |
f *= cd; | |
if (abs(1 - cd) < STOP) return front * (f - 1); | |
} | |
return Infinity; | |
} | |
//*/ | |
// cumulative distribution function of F-distribution | |
function fisher(x, df1, df2) { | |
return incbeta(df1 * x / (df1 * x + df2), df1 / 2, df2 / 2); | |
} | |
// variance equality test (probalibity pBoth when x and y have same variances) | |
function varTest(x, y) { | |
const nX = x.length, nY = y.length, nX1 = nX - 1, nY1 = nY - 1; | |
const meanX = x.reduce((r, v) => r + v, 0) / nX; | |
const meanY = y.reduce((r, v) => r + v, 0) / nY; | |
const varX = x.reduce((r, v) => r + (v - meanX) ** 2, 0) / nX1; | |
const varY = y.reduce((r, v) => r + (v - meanY) ** 2, 0) / nY1; | |
const s = varX / varY, dfX = nX1, dfY = nY1; | |
const pLess = fisher(s, dfX, dfY); | |
const pGreater = 1 - pLess; | |
const pBoth = 2 * Math.min(pGreater, pLess); | |
return {pLess, pGreater, pBoth, s, dfX, dfY}; | |
} | |
// from http://data-science.gr.jp/implementation/ist_r_f_test.html | |
const x = [301, 311, 325, 291, 388, 412, 325, 361, 287]; | |
const y = [197, 180, 247, 260, 247, 199, 179, 134, 163, 200]; | |
// from https://stats.biopapyrus.jp/stats/f-test.html | |
//const x = [10, 23, 19, 14, 41, 33, 36, 31, 50]; | |
//const y = [14, 84, 44, 11, 36, 71, 34, 54, 61]; | |
console.log(varTest(x, y)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey, in terms of licensing is it OK if I use this code :)?