Skip to content

Instantly share code, notes, and snippets.

@aewallin

aewallin/chi2_stable32.py

Last active Dec 31, 2020
Embed
What would you like to do?
Comparison between scipy and approximations for inverse chi-squared cumulative distribution
# The code below relates to computing the inverse chi-squared cumulative distribution,
# used e.g. in Stable32 and allantools for computing confidence intervals.
# This blog post has the images produced by the code, and some comments
# http://www.anderswallin.net/2020/12/fun-with-chi-squared/
#
# Anders Wallin, 2020-12-29
#
import numpy as np
import scipy.stats
import scipy.special
import math
import matplotlib.pyplot as plt
import time
########################################################################
# numerical recipes in python
# from https://github.com/mauriceling/dose/blob/master/dose/copads/nrpy.py
#
# Copyright (c) Maurice H.T. Ling <mauriceling@acm.org>
# Date created: 19th March 2008
# License: Unless specified otherwise, all parts of this package, except
# those adapted, are covered under Python Software Foundation License
# version 2.
#
# Note: Stable32 source uses ITMAX = 100
ITMAX = 100
EPS = 3.0e-7
def gammp(a, x):
"""
Gamma incomplete function, P(a,x).
P(a,x) = (1/gammln(a)) * integral(0, x, (e^-t)*(t^(a-1)), dt)
Depend: gser, gcf, gammln
@see: NRP 6.2
@see: Ling, MHT. 2009. Compendium of Distributions, I: Beta, Binomial, Chi-
Square, F, Gamma, Geometric, Poisson, Student's t, and Uniform. The Python
Papers Source Codes 1:4
@param a: float number
@param x: float number
@return: float number
@status: Tested function
@since: version 0.1
"""
if (x < 0.0 or a <= 0.0):
raise ValueError('Bad value for a or x: %s, %s' % (a, x))
if (x < a + 1.0):
out = gser(a, x)[0]
else:
out = 1.0 - gcf(a, x)[0]
#print "gammp(%f, %f) = %f "%(a, x, out)
#time.sleep(0.5)
return out
def gser(a, x, itmax=700, eps=3.e-7):
"""
Series approximation to the incomplete gamma function.
@see: http://mail.python.org/pipermail/python-list/2000-June/671838.html
@see: Ling, MHT. 2009. Compendium of Distributions, I: Beta, Binomial, Chi-
Square, F, Gamma, Geometric, Poisson, Student's t, and Uniform. The Python
Papers Source Codes 1:4
@status: Tested function
@since: version 0.1
"""
gln = gammln(a)
if (x < 0.0):
raise ValueError('Bad value for x: %s' % a)
if (x == 0.0):
return(0.0, 0.0)
ap = a
total = 1.0 / a
delta = total
n = 1
while n <= ITMAX:
ap = ap + 1.0
delta = delta * x / ap
total = total + delta
if (abs(delta) < abs(total) * eps):
return (total * math.exp(-x + a * math.log(x) - gln), gln)
n = n + 1
raise MaxIterationsException('Maximum iterations reached: %s, %s'
% (abs(delta), abs(total) * eps))
def gcf(a, x, itmax=200, eps=3.e-7):
"""
Continued fraction approx'n of the incomplete gamma function.
@see: http://mail.python.org/pipermail/python-list/2000-June/671838.html
@see: Ling, MHT. 2009. Compendium of Distributions, I: Beta, Binomial, Chi-
Square, F, Gamma, Geometric, Poisson, Student's t, and Uniform. The Python
Papers Source Codes 1:4
@status: Tested function
@since: version 0.1
"""
gln = gammln(a)
gold = 0.0
a0 = 1.0
a1 = x
b0 = 0.0
b1 = 1.0
fac = 1.0
n = 1
while n <= ITMAX:
an = n
ana = an - a
a0 = (a1 + a0 * ana) * fac
b0 = (b1 + b0 * ana) * fac
anf = an * fac
a1 = x * a0 + anf * a1
b1 = x * b0 + anf * b1
if (a1 != 0.0):
fac = 1.0 / a1
g = b1 * fac
if (abs((g - gold) / g) < eps):
return (g * math.exp(-x + a * math.log(x) - gln), gln)
gold = g
n = n + 1
raise MaxIterationsException('Maximum iterations reached: %s'
% abs((g - gold) / g))
def gammln(n):
"""
Complete Gamma function.
@see: NRP 6.1
@see: http://mail.python.org/pipermail/python-list/2000-June/671838.html
@see: Ling, MHT. 2009. Compendium of Distributions, I: Beta, Binomial, Chi-
Square, F, Gamma, Geometric, Poisson, Student's t, and Uniform. The Python
Papers Source Codes 1:4
@param n: float number
@return: float number
@status: Tested function
@since: version 0.1
"""
gammln_cof = [76.18009173, -86.50532033, 24.01409822,
-1.231739516e0, 0.120858003e-2, -0.536382e-5]
x = n - 1.0
tmp = x + 5.5
tmp = (x + 0.5) * math.log(tmp) - tmp
ser = 1.0
for j in range(6):
x = x + 1.
ser = ser + gammln_cof[j] / x
return tmp + math.log(2.50662827465 * ser)
# numerical recipes in python
########################################################################
########################################################################
# Alternative implementation of Chi-Squared function in Stable32
# This is possibly used in the Windows executable (?)
#
# Collected Algorithms from CACM, Vol. I, #209,
# D. Ibbetson and E. Brothers, 1963.
#
# c-source: CNP.C
# Python code AW2020-12-28
def CalcNormalProb(x):
if (x == 0.0):
z = 0.0;
else:
y = abs(x) / 2.0;
if (y >= 3.0):
z = 1.0;
else:
if (y < 1.0):
w = y * y;
z = ((((((((0.000124818987 * w
-.001075204047) * w + .005198775019) * w
-.019198292004) * w + .059054035642) * w
-.151968751364) * w + .319152932694) * w
-.531923007300) * w + .797884560593) * y * 2.0;
else:
y = y - 2.0;
z = (((((((((((((-.000045255659 * y
+.000152529290) * y - .000019538132) * y
-.000676904986) * y + .001390604284) * y
-.000794620820) * y - .002034254874) * y
+.006549791214) * y - .010557625006) * y
+.011630447319) * y - .009279453341) * y
+.005353579108) * y - .002141268741) * y
+.000535310849) * y + .999936657524;
if (x > 0.0):
return ((z + 1.0) / 2.0);
else:
return ((1.0 - z) / 2.0);
# Collected Algorithms from CACM, Vol. I, #299,
# I.D. Hill and M.C. Pike, 1965.
#
# c-source: CCSP.C
# python implementation AW2020-12-28
def CalcChiSqrProb(x, edf):
#int f;
#BOOL even;
bigx=False
#float chiprob;
#double a, c, e, y, z, s;
if(x > 1416): # /* avoid O/F of exp(-0.5*x) < DBL_MIN / 2.225e-308 */
bigx=True
f = int( edf )
if ( (x < 0.0) or (f < 1.0) ):
return (-1.0) # /* error code */
a = .5 * x;
even = bool(f % 2);
even = not even;
if (even or f > 2 and (not bigx)):
y = np.exp(-a);
if (even):
s = y;
else:
s = 2.0 * CalcNormalProb( -1.0*np.sqrt(x) ); # cumulative normal distribution
if (f > 2):
x = (0.5 * (edf - 1.0));
if (even):
z = 1.0;
else:
z = .5;
if (bigx):
if even:
e = 0.0
else:
e = .572364942925
# e = (even) ? 0.0 : .572364942925;
c = np.log(a);
#for ( ; z <= x; z += 1.0)
while z<=x:
e = np.log(z) + e;
s = np.exp(c * z - a - e) + s;
z += 1.0
chiprob = s;
else:
if even:
e = 1.0
else:
e = .564189583548 / np.sqrt(a);
#e = (even) ? 1.0 : .564189583548 / sqrt(a);
c = 0.0;
#for ( ; z <= x; z += 1.0)
while z <= x:
e = e * a / z;
c += e;
z += 1.0
chiprob = (c * y + s);
else:
chiprob = s;
#print "CalcChiSqrProb(%f, %f) = %f "%(x,edf, chiprob)
#time.sleep(0.5)
return 1.0-chiprob;
# /* end CalcChiSqrProb() */
ci = scipy.special.erf(1/math.sqrt(2)) # = 0.68268949213708585
ci_l = min(np.abs(ci), np.abs((ci-1))) / 2
ci_h = 1 - ci_l
print ci_l, ci_h # [0.159, 0.841]
def chi2_ppf_stable32(p, edf, variant=True):
# chi-squared inverse cumulative distribution function
# as implemented in Stable32 source: CICS3.c
# python code by Anders Wallin, 2020-12-28
#
# NOTE: there appears to be a typo on the a1-coefficient
# from the A&S book, 26.2.22.
if edf>100:
# Abramowitz & Stegun, Handbook of Mathematical
# Functions, Sections 26.2.22 & 26.4.17
a0=2.30753 # Coefficients from A&S 26.2.22
a1=0.27601 # typo in the Stable32 source code!?
#a1=0.27061 # a1 coefficient in A&S 26.2.22
b1=0.99229
b2=0.04481
#p=1-p
p1 = min(p, 1-p)
t = np.sqrt(-2.0*np.log(p1)) # // A&S 26.2.22
n = t-((a0+(a1*t))/(1.0+(t*(b1+(t*b2))))) # // "
b=2.0/(9.0*edf) # // A&S 26.4.17
if ((p-0.5)<0):
s=-1.0 # // Greenhall revision
elif ((p-0.5)>0):
s=1.0 # // 1 function for
else:
s=0.0 # // both tails
y=1-b+(s*n*np.sqrt(b)) # // A&S 26.4.17
x=edf*pow(y, 3.0) # // "
return x
else: # edf < =100
# see e.g. https://daviddeley.com/random/code.htm
# Press et al., Numerical Recipes
# Iterative solution for edf<=100
p=1-p
print p
x = edf+(0.5-p)*edf*0.5 # /* start value for chi squared */
if variant:
prob = gammp(edf/2, (float)(x/2)) # Newer function, from Numerical Recipes
else:
prob = CalcChiSqrProb(x, edf) # Older (?), possibly in use in the Windows binary?
#prob=1-prob
div = 0.1
while( abs((prob-p)/p)>0.0001): # /* accuracy criterion */
if(prob-p>0): # /* save sign of error */
sign=1.0
else:
sign=-1.0
x += edf*(prob-p)/div # /* iteration increment */
if(x>0): # /* make sure argument is + */
if variant:
prob = gammp(edf/2, (x/2))
else:
prob = CalcChiSqrProb(x, edf)
prob = 1-prob
else: # /* otherwise restore it & reduce increment */
x -= edf*(prob-p)/div
div*=2
if(((prob-p)/sign)<0): # /* did sign of error reverse? */
div*=2 # /* reduce increment if it did */
return x
plt.figure()
# loop through EDFs and probabilities for plotting
edfs = []
for edf in [2, 4, 8, 16, 32, 64, 101, 150, 200, 250, 300,500, 1000, 3000, 10000]:
ps=[]
chis=[]
chis_32 = []
edfs.append(edf)
for p in np.linspace(0.1,0.9,100):
chi2 = scipy.stats.chi2.ppf( p, edf )
ps.append(p)
chis.append(chi2/edf) # normalize with edf
chis_32.append( chi2_ppf_stable32(p, edf, variant=False) / edf )
chis = np.array(chis)
chis_32 = np.array(chis_32)
plt.subplot(1,3,1)
plt.plot(ps, chis, label='scipy EDF=%d'%edf)
plt.plot(ps, chis_32,'--', label='Stable32 EDF=%d'%edf)
plt.subplot(1,3,2)
plt.plot(ps, chis_32-chis,'-', label='EDF=%d'%edf)
# estimate error in CI
# CIs computed as
# var_h = float(edf) * variance / chi2_l
ci_scipy = np.sqrt( np.divide( float(edf) , edf*chis) )
ci_stable32 = np.sqrt( np.divide( float(edf) , edf*chis_32) )
ci_err = np.divide(ci_stable32, ci_scipy)-1.0 # relative error in CI
plt.subplot(1,3,3)
plt.plot( ps,ci_err, label='EDF=%d'%edf )
plt.subplot(1,3,3)
plt.ylabel('Relative error in CI of xDEV')
plt.xlabel('p')
plt.title('Relative error in Stable32 CIs vs. allantools')
plt.plot([ci_l, ci_l],[-0.001, 0.001],'-.',label='1-sigma lower p')
plt.plot([ci_h, ci_h],[-0.001, 0.001],'-.',label='1-sigma upper p')
plt.ylim((-0.0005, 0.0005))
plt.grid()
plt.legend()
plt.subplot(1,3,1)
plt.title('Chi-squared inverse cumulative distribution.\nscipy.stats.chi2.ppf() vs Stable32 implementation\nAW2020-12-28')
plt.xlabel('p')
plt.ylabel('Chi-squared / EDF')
plt.grid()
plt.legend()
plt.subplot(1,3,2)
plt.title('Difference: Stable32 - scipy.stats.chi2.ppf()')
plt.plot([ci_l, ci_l],[-0.001, 0.001],'-.',label='1-sigma lower p')
plt.plot([ci_h, ci_h],[-0.001, 0.001],'-.',label='1-sigma upper p')
plt.ylim((-0.001, 0.001))
plt.xlabel('p')
plt.ylabel('Stable32 - scipy.stats.chi2.ppf, Chi-squared / EDF')
plt.grid()
plt.legend()
# figure with expected CI error vs edf, for 1-sigma CI
# loop through EDFs and probabilities for plotting
edfs = np.logspace(0.3,5,300)
ci_hi_err=[]
ci_lo_err=[]
for edf in edfs:
ps=[]
chis=[]
chis_32 = []
ci_hi_scipy = np.sqrt( edf/scipy.stats.chi2.ppf( ci_l, edf ) )
ci_hi_32 = np.sqrt( edf/chi2_ppf_stable32( ci_l, edf, variant=True) )
ci_hi_err.append( ci_hi_32/ci_hi_scipy - 1.0)
ci_lo_scipy = np.sqrt( edf/scipy.stats.chi2.ppf( ci_h, edf ) )
ci_lo_32 = np.sqrt( edf/chi2_ppf_stable32( ci_h, edf, variant=True) )
ci_lo_err.append( ci_lo_32/ci_lo_scipy - 1.0)
plt.figure()
plt.semilogx( edfs, ci_hi_err, 'b-', label='1-sigma hi CI')
plt.semilogx( edfs, ci_lo_err, 'r-', label='1-sigma lo CI')
# data from some Allantools tests
# phase.dat ADEV octave results, ci_demo.py file
# Fromat (relative errors)
# AF EDF min_CI dev max_CI
adev_phasedat = [ (1, 782, 0.000125, -0.000006, -0.000135),
(2, 356, 0.000195, -0.000008, -0.000221),
(4, 171, 0.000283, 0.000019, -0.000305) ,
(8, 84, -0.000007, -0.000044, -0.000011),
(16, 41, 0.000009, -0.000005, -0.000024),
(32, 20, 0.000002, 0.000001, 0.000043),
(64, 9, -0.000002, 0.000003, 0.000019),
(128, 4, 0.000004, -0.000006, 0.000123),
]
plt.plot( [x[1] for x in adev_phasedat], [x[2] for x in adev_phasedat],'ro',label='phase.dat ADEV lower CI')
plt.plot( [x[1] for x in adev_phasedat], [x[3] for x in adev_phasedat],'ko',label='phase.dat ADEV')
plt.plot( [x[1] for x in adev_phasedat], [x[4] for x in adev_phasedat],'bo',label='phase.dat ADEV upper CI')
# CS5071A test dataset, decade results
# Format:
# EDF min_CI dev max_CI
adev_5071a = [(286451, 0.000019, -0.000003,-0.000001),
(143225, 0.000004, 0.000012, 0.000009),
(71612, 0.000018, -0.000006, -0.000013),
(28644, 0.000018, -0.000007, -0.000023),
(15080, 0.000015, -0.000018, -0.000030),
(7486, 0.000047, 0.000003, -0.000046),
(2973, 0.000068, 0.000010, -0.000069),
(1855, 0.000071, 0.000009, -0.000097),
(927, 0.000092, -0.000023, -0.000151),
(370, 0.000202, -0.000002, -0.000220),
(184, 0.000290, -0.000009, -0.000306),
(92, 0.000011, -0.000010, -0.000017),
(36, 0.000015, 0.000018, 0.000039),
(23, 0.000027, -0.000029, 0.000032),
(10, -0.000007, 0.000017, -0.000027),
(3, 0.000018, -0.000002, -0.000199),
]
plt.plot( [x[0] for x in adev_5071a], [x[1] for x in adev_5071a],'rd',label='5071a ADEV lower CI')
plt.plot( [x[0] for x in adev_5071a], [x[2] for x in adev_5071a],'kd',label='5071a ADEV')
plt.plot( [x[0] for x in adev_5071a], [x[3] for x in adev_5071a],'bd',label='5071a ADEV upper CI')
plt.xlabel('EDF')
plt.ylabel('Relative error in CI')
plt.grid()
plt.legend()
plt.title('Expected relative error in Stable32 CIs vs. allantools\n AW2020-12-29')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment