shuhaowu/gist:6177897

## gistfile1.py
from __future__ import division
from math import sqrt, log, pi, cos, sin, expm1

_a1 = -3.969683028665376e+01
_a2 = 2.209460984245205e+02
_a3 = -2.759285104469687e+02
_a4 = 1.383577518672690e+02
_a5 = -3.066479806614716e+01
_a6 = 2.506628277459239e+00
_b1 = -5.447609879822406e+01
_b2 = 1.615858368580409e+02
_b3 = -1.556989798598866e+02
_b4 = 6.680131188771972e+01
_b5 = -1.328068155288572e+01
_c1 = -7.784894002430293e-03
_c2 = -3.223964580411365e-01
_c3 = -2.400758277161838e+00
_c4 = -2.549732539343734e+00
_c5 = 4.374664141464968e+00
_c6 = 2.938163982698783e+00
_d1 = 7.784695709041462e-03
_d2 = 3.224671290700398e-01
_d3 = 2.445134137142996e+00
_d4 = 3.754408661907416e+00
_p_low = 0.02425
_p_high = 1.0 - _p_low


def inv_norm_cdf(p):
    """Estimates the inverse cumulative distribution function for a
    normal distribution.

    Args:
        p: the lower tail probability
    """
    if 0 < p < _p_low:
        # rational approximation for the lower region
        q = sqrt(-2 * log(p))
        x = (((((_c1*q+_c2)*q+_c3)*q+_c4)*q+_c5)*q+_c6) / ((((_d1*q+_d2)*q+_d3)*q+_d4)*q+1)
    elif _p_low <= p <= _p_high:
        # rational approximation for the central region
        q = p - 0.5
        r = q * q
        x = (((((_a1*r+_a2)*r+_a3)*r+_a4)*r+_a5)*r+_a6)*q / (((((_b1*r+_b2)*r+_b3)*r+_b4)*r+_b5)*r+1.0)
    else:
        # rational approximation for the upper region
        q = sqrt(-2.0 * log(1.0 - p))
        x = -(((((_c1*q+_c2)*q+_c3)*q+_c4)*q+_c5)*q+_c6) / ((((_d1*q+_d2)*q+_d3)*q+_d4)*q+1.0)

    # Can't refine to maximum precision due to the lack of the erfc function.
    # See http://home.online.no/~pjacklam/notes/invnorm/ for details.
    return x


def tppf(p, df):
    """Calculates the percentage point function (ppf) from student t's
    distribution.

    Args:
        p: two tail probability.
        df: Degrees of freedom.

    Notes:
        Ported from http://www.sultanik.com/Quantile_function and
        http://svn.r-project.org/R/trunk/src/nmath/qt.c

        It is not exactly know if there is any problems with this
        implementation as I simplify fixed the implementation from
        sultanik.com with the R implementation without applying a lot of
        fixes from the R code as a) I'm not sure what they do, b) the
        approximation seem to be okay compared against scipy's
        implementation, and c) i'm not sure how i can rip out the
        function from all the R stuff.

        I've tested the errors from a q value of 0.50 to 1.0 (the valid
        range, increment of 0.01) with df from 1 to 149. The average err
        is 5.28e-6 max error being 0.00277 and a stddev of 7.71e-5.
    """
    if p > 1 or p <= 0:
        raise ValueError("p value must be (0, 1]")

    if df < 1:
        raise ValueError("df value must be >= 1")

    if df == 1:
        p *= pi / 2
        return cos(p) / sin(p)


    a = 1.0 / (df - 0.5)
    b = 48 / (a * a)
    c = ((20700 * a / b - 98) * a - 16) * a + 96.36
    d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * pi / 2) * df

    x = d * p
    y = x ** (2.0 / df)
    if y > a + 0.05:
        # asymptotic inverse expansion about the normal
        x = inv_norm_cdf(p * 0.5)
        y = x * x
        if df < 5:
            c += 0.3 * (df - 4.5) * (x + 0.6)
        c = (((0.5 * d * x - 0.5) * x - 7.0) * x - 2.0) * x + b + c
        y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b + 1.0) * x
        y = expm1(a * y * y)
    else:
        y = ((1.0/(((df + 6.0)/(df * y) - 0.089 * d - 0.822) * (df+2.0) * 3.0) + 0.5 / (df+4.0))*y - 1.0) * (df + 1.0) / (df + 2.0) + 1.0 / y

    return sqrt(df * y)


def test():
  from scipy.stats import t
  from numpy import std, mean
  """ Tests the function for errors against scipy's implementation """
  err = []
  for q in xrange(500, 1000):
    q /= 1000
    for df in xrange(1, 150):
      err.append(abs(tppf((1 - q) * 2, df) - t.ppf(q, df)))

  print "max", max(err)
  print "mean", mean(err)
  print "stddev", std(err)
	from __future__ import division
	from math import sqrt, log, pi, cos, sin, expm1

	_a1 = -3.969683028665376e+01
	_a2 = 2.209460984245205e+02
	_a3 = -2.759285104469687e+02
	_a4 = 1.383577518672690e+02
	_a5 = -3.066479806614716e+01
	_a6 = 2.506628277459239e+00
	_b1 = -5.447609879822406e+01
	_b2 = 1.615858368580409e+02
	_b3 = -1.556989798598866e+02
	_b4 = 6.680131188771972e+01
	_b5 = -1.328068155288572e+01
	_c1 = -7.784894002430293e-03
	_c2 = -3.223964580411365e-01
	_c3 = -2.400758277161838e+00
	_c4 = -2.549732539343734e+00
	_c5 = 4.374664141464968e+00
	_c6 = 2.938163982698783e+00
	_d1 = 7.784695709041462e-03
	_d2 = 3.224671290700398e-01
	_d3 = 2.445134137142996e+00
	_d4 = 3.754408661907416e+00
	_p_low = 0.02425
	_p_high = 1.0 - _p_low


	def inv_norm_cdf(p):
	"""Estimates the inverse cumulative distribution function for a
	normal distribution.

	Args:
	p: the lower tail probability
	"""
	if 0 < p < _p_low:
	# rational approximation for the lower region
	q = sqrt(-2 * log(p))
	x = (((((_c1q+_c2)q+_c3)q+_c4)q+_c5)q+_c6) / ((((_d1q+_d2)q+_d3)q+_d4)*q+1)
	elif _p_low <= p <= _p_high:
	# rational approximation for the central region
	q = p - 0.5
	r = q * q
	x = (((((_a1r+_a2)r+_a3)r+_a4)r+_a5)r+_a6)q / (((((_b1r+_b2)r+_b3)r+_b4)r+_b5)*r+1.0)
	else:
	# rational approximation for the upper region
	q = sqrt(-2.0 * log(1.0 - p))
	x = -(((((_c1q+_c2)q+_c3)q+_c4)q+_c5)q+_c6) / ((((_d1q+_d2)q+_d3)q+_d4)*q+1.0)

	# Can't refine to maximum precision due to the lack of the erfc function.
	# See http://home.online.no/~pjacklam/notes/invnorm/ for details.
	return x


	def tppf(p, df):
	"""Calculates the percentage point function (ppf) from student t's
	distribution.

	Args:
	p: two tail probability.
	df: Degrees of freedom.

	Notes:
	Ported from http://www.sultanik.com/Quantile_function and
	http://svn.r-project.org/R/trunk/src/nmath/qt.c

	It is not exactly know if there is any problems with this
	implementation as I simplify fixed the implementation from
	sultanik.com with the R implementation without applying a lot of
	fixes from the R code as a) I'm not sure what they do, b) the
	approximation seem to be okay compared against scipy's
	implementation, and c) i'm not sure how i can rip out the
	function from all the R stuff.

	I've tested the errors from a q value of 0.50 to 1.0 (the valid
	range, increment of 0.01) with df from 1 to 149. The average err
	is 5.28e-6 max error being 0.00277 and a stddev of 7.71e-5.
	"""
	if p > 1 or p <= 0:
	raise ValueError("p value must be (0, 1]")

	if df < 1:
	raise ValueError("df value must be >= 1")

	if df == 1:
	p *= pi / 2
	return cos(p) / sin(p)


	a = 1.0 / (df - 0.5)
	b = 48 / (a * a)
	c = ((20700 * a / b - 98) * a - 16) * a + 96.36
	d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * pi / 2) * df

	x = d * p
	y = x ** (2.0 / df)
	if y > a + 0.05:
	# asymptotic inverse expansion about the normal
	x = inv_norm_cdf(p * 0.5)
	y = x * x
	if df < 5:
	c += 0.3 * (df - 4.5) * (x + 0.6)
	c = (((0.5 * d * x - 0.5) * x - 7.0) * x - 2.0) * x + b + c
	y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b + 1.0) * x
	y = expm1(a * y * y)
	else:
	y = ((1.0/(((df + 6.0)/(df * y) - 0.089 * d - 0.822) * (df+2.0) * 3.0) + 0.5 / (df+4.0))y - 1.0) (df + 1.0) / (df + 2.0) + 1.0 / y

	return sqrt(df * y)



	def test():
	from scipy.stats import t
	from numpy import std, mean
	""" Tests the function for errors against scipy's implementation """
	err = []
	for q in xrange(500, 1000):
	q /= 1000
	for df in xrange(1, 150):
	err.append(abs(tppf((1 - q) * 2, df) - t.ppf(q, df)))

	print "max", max(err)
	print "mean", mean(err)
	print "stddev", std(err)