Last active
December 31, 2020 09:42
-
-
Save aewallin/a8b93c0836c06b5ca34525900d97e4d6 to your computer and use it in GitHub Desktop.
Comparison between scipy and approximations for inverse chi-squared cumulative distribution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The code below relates to computing the inverse chi-squared cumulative distribution, | |
# used e.g. in Stable32 and allantools for computing confidence intervals. | |
# This blog post has the images produced by the code, and some comments | |
# http://www.anderswallin.net/2020/12/fun-with-chi-squared/ | |
# | |
# Anders Wallin, 2020-12-29 | |
# | |
import numpy as np | |
import scipy.stats | |
import scipy.special | |
import math | |
import matplotlib.pyplot as plt | |
import time | |
######################################################################## | |
# numerical recipes in python | |
# from https://github.com/mauriceling/dose/blob/master/dose/copads/nrpy.py | |
# | |
# Copyright (c) Maurice H.T. Ling <mauriceling@acm.org> | |
# Date created: 19th March 2008 | |
# License: Unless specified otherwise, all parts of this package, except | |
# those adapted, are covered under Python Software Foundation License | |
# version 2. | |
# | |
# Note: Stable32 source uses ITMAX = 100 | |
ITMAX = 100 | |
EPS = 3.0e-7 | |
def gammp(a, x): | |
""" | |
Gamma incomplete function, P(a,x). | |
P(a,x) = (1/gammln(a)) * integral(0, x, (e^-t)*(t^(a-1)), dt) | |
Depend: gser, gcf, gammln | |
@see: NRP 6.2 | |
@see: Ling, MHT. 2009. Compendium of Distributions, I: Beta, Binomial, Chi- | |
Square, F, Gamma, Geometric, Poisson, Student's t, and Uniform. The Python | |
Papers Source Codes 1:4 | |
@param a: float number | |
@param x: float number | |
@return: float number | |
@status: Tested function | |
@since: version 0.1 | |
""" | |
if (x < 0.0 or a <= 0.0): | |
raise ValueError('Bad value for a or x: %s, %s' % (a, x)) | |
if (x < a + 1.0): | |
out = gser(a, x)[0] | |
else: | |
out = 1.0 - gcf(a, x)[0] | |
#print "gammp(%f, %f) = %f "%(a, x, out) | |
#time.sleep(0.5) | |
return out | |
def gser(a, x, itmax=700, eps=3.e-7): | |
""" | |
Series approximation to the incomplete gamma function. | |
@see: http://mail.python.org/pipermail/python-list/2000-June/671838.html | |
@see: Ling, MHT. 2009. Compendium of Distributions, I: Beta, Binomial, Chi- | |
Square, F, Gamma, Geometric, Poisson, Student's t, and Uniform. The Python | |
Papers Source Codes 1:4 | |
@status: Tested function | |
@since: version 0.1 | |
""" | |
gln = gammln(a) | |
if (x < 0.0): | |
raise ValueError('Bad value for x: %s' % a) | |
if (x == 0.0): | |
return(0.0, 0.0) | |
ap = a | |
total = 1.0 / a | |
delta = total | |
n = 1 | |
while n <= ITMAX: | |
ap = ap + 1.0 | |
delta = delta * x / ap | |
total = total + delta | |
if (abs(delta) < abs(total) * eps): | |
return (total * math.exp(-x + a * math.log(x) - gln), gln) | |
n = n + 1 | |
raise MaxIterationsException('Maximum iterations reached: %s, %s' | |
% (abs(delta), abs(total) * eps)) | |
def gcf(a, x, itmax=200, eps=3.e-7): | |
""" | |
Continued fraction approx'n of the incomplete gamma function. | |
@see: http://mail.python.org/pipermail/python-list/2000-June/671838.html | |
@see: Ling, MHT. 2009. Compendium of Distributions, I: Beta, Binomial, Chi- | |
Square, F, Gamma, Geometric, Poisson, Student's t, and Uniform. The Python | |
Papers Source Codes 1:4 | |
@status: Tested function | |
@since: version 0.1 | |
""" | |
gln = gammln(a) | |
gold = 0.0 | |
a0 = 1.0 | |
a1 = x | |
b0 = 0.0 | |
b1 = 1.0 | |
fac = 1.0 | |
n = 1 | |
while n <= ITMAX: | |
an = n | |
ana = an - a | |
a0 = (a1 + a0 * ana) * fac | |
b0 = (b1 + b0 * ana) * fac | |
anf = an * fac | |
a1 = x * a0 + anf * a1 | |
b1 = x * b0 + anf * b1 | |
if (a1 != 0.0): | |
fac = 1.0 / a1 | |
g = b1 * fac | |
if (abs((g - gold) / g) < eps): | |
return (g * math.exp(-x + a * math.log(x) - gln), gln) | |
gold = g | |
n = n + 1 | |
raise MaxIterationsException('Maximum iterations reached: %s' | |
% abs((g - gold) / g)) | |
def gammln(n): | |
""" | |
Complete Gamma function. | |
@see: NRP 6.1 | |
@see: http://mail.python.org/pipermail/python-list/2000-June/671838.html | |
@see: Ling, MHT. 2009. Compendium of Distributions, I: Beta, Binomial, Chi- | |
Square, F, Gamma, Geometric, Poisson, Student's t, and Uniform. The Python | |
Papers Source Codes 1:4 | |
@param n: float number | |
@return: float number | |
@status: Tested function | |
@since: version 0.1 | |
""" | |
gammln_cof = [76.18009173, -86.50532033, 24.01409822, | |
-1.231739516e0, 0.120858003e-2, -0.536382e-5] | |
x = n - 1.0 | |
tmp = x + 5.5 | |
tmp = (x + 0.5) * math.log(tmp) - tmp | |
ser = 1.0 | |
for j in range(6): | |
x = x + 1. | |
ser = ser + gammln_cof[j] / x | |
return tmp + math.log(2.50662827465 * ser) | |
# numerical recipes in python | |
######################################################################## | |
######################################################################## | |
# Alternative implementation of Chi-Squared function in Stable32 | |
# This is possibly used in the Windows executable (?) | |
# | |
# Collected Algorithms from CACM, Vol. I, #209, | |
# D. Ibbetson and E. Brothers, 1963. | |
# | |
# c-source: CNP.C | |
# Python code AW2020-12-28 | |
def CalcNormalProb(x): | |
if (x == 0.0): | |
z = 0.0; | |
else: | |
y = abs(x) / 2.0; | |
if (y >= 3.0): | |
z = 1.0; | |
else: | |
if (y < 1.0): | |
w = y * y; | |
z = ((((((((0.000124818987 * w | |
-.001075204047) * w + .005198775019) * w | |
-.019198292004) * w + .059054035642) * w | |
-.151968751364) * w + .319152932694) * w | |
-.531923007300) * w + .797884560593) * y * 2.0; | |
else: | |
y = y - 2.0; | |
z = (((((((((((((-.000045255659 * y | |
+.000152529290) * y - .000019538132) * y | |
-.000676904986) * y + .001390604284) * y | |
-.000794620820) * y - .002034254874) * y | |
+.006549791214) * y - .010557625006) * y | |
+.011630447319) * y - .009279453341) * y | |
+.005353579108) * y - .002141268741) * y | |
+.000535310849) * y + .999936657524; | |
if (x > 0.0): | |
return ((z + 1.0) / 2.0); | |
else: | |
return ((1.0 - z) / 2.0); | |
# Collected Algorithms from CACM, Vol. I, #299, | |
# I.D. Hill and M.C. Pike, 1965. | |
# | |
# c-source: CCSP.C | |
# python implementation AW2020-12-28 | |
def CalcChiSqrProb(x, edf): | |
#int f; | |
#BOOL even; | |
bigx=False | |
#float chiprob; | |
#double a, c, e, y, z, s; | |
if(x > 1416): # /* avoid O/F of exp(-0.5*x) < DBL_MIN / 2.225e-308 */ | |
bigx=True | |
f = int( edf ) | |
if ( (x < 0.0) or (f < 1.0) ): | |
return (-1.0) # /* error code */ | |
a = .5 * x; | |
even = bool(f % 2); | |
even = not even; | |
if (even or f > 2 and (not bigx)): | |
y = np.exp(-a); | |
if (even): | |
s = y; | |
else: | |
s = 2.0 * CalcNormalProb( -1.0*np.sqrt(x) ); # cumulative normal distribution | |
if (f > 2): | |
x = (0.5 * (edf - 1.0)); | |
if (even): | |
z = 1.0; | |
else: | |
z = .5; | |
if (bigx): | |
if even: | |
e = 0.0 | |
else: | |
e = .572364942925 | |
# e = (even) ? 0.0 : .572364942925; | |
c = np.log(a); | |
#for ( ; z <= x; z += 1.0) | |
while z<=x: | |
e = np.log(z) + e; | |
s = np.exp(c * z - a - e) + s; | |
z += 1.0 | |
chiprob = s; | |
else: | |
if even: | |
e = 1.0 | |
else: | |
e = .564189583548 / np.sqrt(a); | |
#e = (even) ? 1.0 : .564189583548 / sqrt(a); | |
c = 0.0; | |
#for ( ; z <= x; z += 1.0) | |
while z <= x: | |
e = e * a / z; | |
c += e; | |
z += 1.0 | |
chiprob = (c * y + s); | |
else: | |
chiprob = s; | |
#print "CalcChiSqrProb(%f, %f) = %f "%(x,edf, chiprob) | |
#time.sleep(0.5) | |
return 1.0-chiprob; | |
# /* end CalcChiSqrProb() */ | |
ci = scipy.special.erf(1/math.sqrt(2)) # = 0.68268949213708585 | |
ci_l = min(np.abs(ci), np.abs((ci-1))) / 2 | |
ci_h = 1 - ci_l | |
print ci_l, ci_h # [0.159, 0.841] | |
def chi2_ppf_stable32(p, edf, variant=True): | |
# chi-squared inverse cumulative distribution function | |
# as implemented in Stable32 source: CICS3.c | |
# python code by Anders Wallin, 2020-12-28 | |
# | |
# NOTE: there appears to be a typo on the a1-coefficient | |
# from the A&S book, 26.2.22. | |
if edf>100: | |
# Abramowitz & Stegun, Handbook of Mathematical | |
# Functions, Sections 26.2.22 & 26.4.17 | |
a0=2.30753 # Coefficients from A&S 26.2.22 | |
a1=0.27601 # typo in the Stable32 source code!? | |
#a1=0.27061 # a1 coefficient in A&S 26.2.22 | |
b1=0.99229 | |
b2=0.04481 | |
#p=1-p | |
p1 = min(p, 1-p) | |
t = np.sqrt(-2.0*np.log(p1)) # // A&S 26.2.22 | |
n = t-((a0+(a1*t))/(1.0+(t*(b1+(t*b2))))) # // " | |
b=2.0/(9.0*edf) # // A&S 26.4.17 | |
if ((p-0.5)<0): | |
s=-1.0 # // Greenhall revision | |
elif ((p-0.5)>0): | |
s=1.0 # // 1 function for | |
else: | |
s=0.0 # // both tails | |
y=1-b+(s*n*np.sqrt(b)) # // A&S 26.4.17 | |
x=edf*pow(y, 3.0) # // " | |
return x | |
else: # edf < =100 | |
# see e.g. https://daviddeley.com/random/code.htm | |
# Press et al., Numerical Recipes | |
# Iterative solution for edf<=100 | |
p=1-p | |
print p | |
x = edf+(0.5-p)*edf*0.5 # /* start value for chi squared */ | |
if variant: | |
prob = gammp(edf/2, (float)(x/2)) # Newer function, from Numerical Recipes | |
else: | |
prob = CalcChiSqrProb(x, edf) # Older (?), possibly in use in the Windows binary? | |
#prob=1-prob | |
div = 0.1 | |
while( abs((prob-p)/p)>0.0001): # /* accuracy criterion */ | |
if(prob-p>0): # /* save sign of error */ | |
sign=1.0 | |
else: | |
sign=-1.0 | |
x += edf*(prob-p)/div # /* iteration increment */ | |
if(x>0): # /* make sure argument is + */ | |
if variant: | |
prob = gammp(edf/2, (x/2)) | |
else: | |
prob = CalcChiSqrProb(x, edf) | |
prob = 1-prob | |
else: # /* otherwise restore it & reduce increment */ | |
x -= edf*(prob-p)/div | |
div*=2 | |
if(((prob-p)/sign)<0): # /* did sign of error reverse? */ | |
div*=2 # /* reduce increment if it did */ | |
return x | |
plt.figure() | |
# loop through EDFs and probabilities for plotting | |
edfs = [] | |
for edf in [2, 4, 8, 16, 32, 64, 101, 150, 200, 250, 300,500, 1000, 3000, 10000]: | |
ps=[] | |
chis=[] | |
chis_32 = [] | |
edfs.append(edf) | |
for p in np.linspace(0.1,0.9,100): | |
chi2 = scipy.stats.chi2.ppf( p, edf ) | |
ps.append(p) | |
chis.append(chi2/edf) # normalize with edf | |
chis_32.append( chi2_ppf_stable32(p, edf, variant=False) / edf ) | |
chis = np.array(chis) | |
chis_32 = np.array(chis_32) | |
plt.subplot(1,3,1) | |
plt.plot(ps, chis, label='scipy EDF=%d'%edf) | |
plt.plot(ps, chis_32,'--', label='Stable32 EDF=%d'%edf) | |
plt.subplot(1,3,2) | |
plt.plot(ps, chis_32-chis,'-', label='EDF=%d'%edf) | |
# estimate error in CI | |
# CIs computed as | |
# var_h = float(edf) * variance / chi2_l | |
ci_scipy = np.sqrt( np.divide( float(edf) , edf*chis) ) | |
ci_stable32 = np.sqrt( np.divide( float(edf) , edf*chis_32) ) | |
ci_err = np.divide(ci_stable32, ci_scipy)-1.0 # relative error in CI | |
plt.subplot(1,3,3) | |
plt.plot( ps,ci_err, label='EDF=%d'%edf ) | |
plt.subplot(1,3,3) | |
plt.ylabel('Relative error in CI of xDEV') | |
plt.xlabel('p') | |
plt.title('Relative error in Stable32 CIs vs. allantools') | |
plt.plot([ci_l, ci_l],[-0.001, 0.001],'-.',label='1-sigma lower p') | |
plt.plot([ci_h, ci_h],[-0.001, 0.001],'-.',label='1-sigma upper p') | |
plt.ylim((-0.0005, 0.0005)) | |
plt.grid() | |
plt.legend() | |
plt.subplot(1,3,1) | |
plt.title('Chi-squared inverse cumulative distribution.\nscipy.stats.chi2.ppf() vs Stable32 implementation\nAW2020-12-28') | |
plt.xlabel('p') | |
plt.ylabel('Chi-squared / EDF') | |
plt.grid() | |
plt.legend() | |
plt.subplot(1,3,2) | |
plt.title('Difference: Stable32 - scipy.stats.chi2.ppf()') | |
plt.plot([ci_l, ci_l],[-0.001, 0.001],'-.',label='1-sigma lower p') | |
plt.plot([ci_h, ci_h],[-0.001, 0.001],'-.',label='1-sigma upper p') | |
plt.ylim((-0.001, 0.001)) | |
plt.xlabel('p') | |
plt.ylabel('Stable32 - scipy.stats.chi2.ppf, Chi-squared / EDF') | |
plt.grid() | |
plt.legend() | |
# figure with expected CI error vs edf, for 1-sigma CI | |
# loop through EDFs and probabilities for plotting | |
edfs = np.logspace(0.3,5,300) | |
ci_hi_err=[] | |
ci_lo_err=[] | |
for edf in edfs: | |
ps=[] | |
chis=[] | |
chis_32 = [] | |
ci_hi_scipy = np.sqrt( edf/scipy.stats.chi2.ppf( ci_l, edf ) ) | |
ci_hi_32 = np.sqrt( edf/chi2_ppf_stable32( ci_l, edf, variant=True) ) | |
ci_hi_err.append( ci_hi_32/ci_hi_scipy - 1.0) | |
ci_lo_scipy = np.sqrt( edf/scipy.stats.chi2.ppf( ci_h, edf ) ) | |
ci_lo_32 = np.sqrt( edf/chi2_ppf_stable32( ci_h, edf, variant=True) ) | |
ci_lo_err.append( ci_lo_32/ci_lo_scipy - 1.0) | |
plt.figure() | |
plt.semilogx( edfs, ci_hi_err, 'b-', label='1-sigma hi CI') | |
plt.semilogx( edfs, ci_lo_err, 'r-', label='1-sigma lo CI') | |
# data from some Allantools tests | |
# phase.dat ADEV octave results, ci_demo.py file | |
# Fromat (relative errors) | |
# AF EDF min_CI dev max_CI | |
adev_phasedat = [ (1, 782, 0.000125, -0.000006, -0.000135), | |
(2, 356, 0.000195, -0.000008, -0.000221), | |
(4, 171, 0.000283, 0.000019, -0.000305) , | |
(8, 84, -0.000007, -0.000044, -0.000011), | |
(16, 41, 0.000009, -0.000005, -0.000024), | |
(32, 20, 0.000002, 0.000001, 0.000043), | |
(64, 9, -0.000002, 0.000003, 0.000019), | |
(128, 4, 0.000004, -0.000006, 0.000123), | |
] | |
plt.plot( [x[1] for x in adev_phasedat], [x[2] for x in adev_phasedat],'ro',label='phase.dat ADEV lower CI') | |
plt.plot( [x[1] for x in adev_phasedat], [x[3] for x in adev_phasedat],'ko',label='phase.dat ADEV') | |
plt.plot( [x[1] for x in adev_phasedat], [x[4] for x in adev_phasedat],'bo',label='phase.dat ADEV upper CI') | |
# CS5071A test dataset, decade results | |
# Format: | |
# EDF min_CI dev max_CI | |
adev_5071a = [(286451, 0.000019, -0.000003,-0.000001), | |
(143225, 0.000004, 0.000012, 0.000009), | |
(71612, 0.000018, -0.000006, -0.000013), | |
(28644, 0.000018, -0.000007, -0.000023), | |
(15080, 0.000015, -0.000018, -0.000030), | |
(7486, 0.000047, 0.000003, -0.000046), | |
(2973, 0.000068, 0.000010, -0.000069), | |
(1855, 0.000071, 0.000009, -0.000097), | |
(927, 0.000092, -0.000023, -0.000151), | |
(370, 0.000202, -0.000002, -0.000220), | |
(184, 0.000290, -0.000009, -0.000306), | |
(92, 0.000011, -0.000010, -0.000017), | |
(36, 0.000015, 0.000018, 0.000039), | |
(23, 0.000027, -0.000029, 0.000032), | |
(10, -0.000007, 0.000017, -0.000027), | |
(3, 0.000018, -0.000002, -0.000199), | |
] | |
plt.plot( [x[0] for x in adev_5071a], [x[1] for x in adev_5071a],'rd',label='5071a ADEV lower CI') | |
plt.plot( [x[0] for x in adev_5071a], [x[2] for x in adev_5071a],'kd',label='5071a ADEV') | |
plt.plot( [x[0] for x in adev_5071a], [x[3] for x in adev_5071a],'bd',label='5071a ADEV upper CI') | |
plt.xlabel('EDF') | |
plt.ylabel('Relative error in CI') | |
plt.grid() | |
plt.legend() | |
plt.title('Expected relative error in Stable32 CIs vs. allantools\n AW2020-12-29') | |
plt.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment