Skip to content

Instantly share code, notes, and snippets.

@shuhaowu
Created August 7, 2013 19:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shuhaowu/6177897 to your computer and use it in GitHub Desktop.
Save shuhaowu/6177897 to your computer and use it in GitHub Desktop.
Student t's distribution approximation
from __future__ import division
from math import sqrt, log, pi, cos, sin, expm1
_a1 = -3.969683028665376e+01
_a2 = 2.209460984245205e+02
_a3 = -2.759285104469687e+02
_a4 = 1.383577518672690e+02
_a5 = -3.066479806614716e+01
_a6 = 2.506628277459239e+00
_b1 = -5.447609879822406e+01
_b2 = 1.615858368580409e+02
_b3 = -1.556989798598866e+02
_b4 = 6.680131188771972e+01
_b5 = -1.328068155288572e+01
_c1 = -7.784894002430293e-03
_c2 = -3.223964580411365e-01
_c3 = -2.400758277161838e+00
_c4 = -2.549732539343734e+00
_c5 = 4.374664141464968e+00
_c6 = 2.938163982698783e+00
_d1 = 7.784695709041462e-03
_d2 = 3.224671290700398e-01
_d3 = 2.445134137142996e+00
_d4 = 3.754408661907416e+00
_p_low = 0.02425
_p_high = 1.0 - _p_low
def inv_norm_cdf(p):
"""Estimates the inverse cumulative distribution function for a
normal distribution.
Args:
p: the lower tail probability
"""
if 0 < p < _p_low:
# rational approximation for the lower region
q = sqrt(-2 * log(p))
x = (((((_c1*q+_c2)*q+_c3)*q+_c4)*q+_c5)*q+_c6) / ((((_d1*q+_d2)*q+_d3)*q+_d4)*q+1)
elif _p_low <= p <= _p_high:
# rational approximation for the central region
q = p - 0.5
r = q * q
x = (((((_a1*r+_a2)*r+_a3)*r+_a4)*r+_a5)*r+_a6)*q / (((((_b1*r+_b2)*r+_b3)*r+_b4)*r+_b5)*r+1.0)
else:
# rational approximation for the upper region
q = sqrt(-2.0 * log(1.0 - p))
x = -(((((_c1*q+_c2)*q+_c3)*q+_c4)*q+_c5)*q+_c6) / ((((_d1*q+_d2)*q+_d3)*q+_d4)*q+1.0)
# Can't refine to maximum precision due to the lack of the erfc function.
# See http://home.online.no/~pjacklam/notes/invnorm/ for details.
return x
def tppf(p, df):
"""Calculates the percentage point function (ppf) from student t's
distribution.
Args:
p: two tail probability.
df: Degrees of freedom.
Notes:
Ported from http://www.sultanik.com/Quantile_function and
http://svn.r-project.org/R/trunk/src/nmath/qt.c
It is not exactly know if there is any problems with this
implementation as I simplify fixed the implementation from
sultanik.com with the R implementation without applying a lot of
fixes from the R code as a) I'm not sure what they do, b) the
approximation seem to be okay compared against scipy's
implementation, and c) i'm not sure how i can rip out the
function from all the R stuff.
I've tested the errors from a q value of 0.50 to 1.0 (the valid
range, increment of 0.01) with df from 1 to 149. The average err
is 5.28e-6 max error being 0.00277 and a stddev of 7.71e-5.
"""
if p > 1 or p <= 0:
raise ValueError("p value must be (0, 1]")
if df < 1:
raise ValueError("df value must be >= 1")
if df == 1:
p *= pi / 2
return cos(p) / sin(p)
a = 1.0 / (df - 0.5)
b = 48 / (a * a)
c = ((20700 * a / b - 98) * a - 16) * a + 96.36
d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * pi / 2) * df
x = d * p
y = x ** (2.0 / df)
if y > a + 0.05:
# asymptotic inverse expansion about the normal
x = inv_norm_cdf(p * 0.5)
y = x * x
if df < 5:
c += 0.3 * (df - 4.5) * (x + 0.6)
c = (((0.5 * d * x - 0.5) * x - 7.0) * x - 2.0) * x + b + c
y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b + 1.0) * x
y = expm1(a * y * y)
else:
y = ((1.0/(((df + 6.0)/(df * y) - 0.089 * d - 0.822) * (df+2.0) * 3.0) + 0.5 / (df+4.0))*y - 1.0) * (df + 1.0) / (df + 2.0) + 1.0 / y
return sqrt(df * y)
def test():
from scipy.stats import t
from numpy import std, mean
""" Tests the function for errors against scipy's implementation """
err = []
for q in xrange(500, 1000):
q /= 1000
for df in xrange(1, 150):
err.append(abs(tppf((1 - q) * 2, df) - t.ppf(q, df)))
print "max", max(err)
print "mean", mean(err)
print "stddev", std(err)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment