josef-pkt/ols_nist_filip.py

## ols_nist_filip.py
# -*- coding: utf-8 -*-
"""Checking numerical Accuracy in Linear Regression

Created on Sat Feb 25 20:36:50 2012

Author: Josef Perktold
License: BSD-3

"""

import numpy as np
import scikits.statsmodels.api as sm

def log_relative_error(actual, desired):
    '''calculate log relative error, number of correct significant digits


    Parameters
    ----------
    actual : array_like
       actual values
    desired : array_like
       desired values

    Returns
    -------
    lre : ndarray
       number of significant digits, uses relative absolute difference if
       desired is not zero, and absolute difference if desired is zero.

    References
    ----------
    http://en.wikibooks.org/wiki/Statistics:Numerical_Methods/Numerical_Comparison_of_Statistical_Software#Measuring_Accuracy
    http://digilander.libero.it/foxes/StRD_Benchmarks/X_NIST_StRD_Benchmarks.htm

    Examples
    --------
    >>> lre([1.000001, 1.000001e-6, 1.000001e30], [1, 1e-6, 1e30])
    array([ 6.,  6.,  6.])

    >>> lre(1.000001e-300,1e-300)
    6.0000000000043521

    >>> lre(0, 0)
    inf

    >>> lre(1.000001e-300,0)
    299.99999956570571

    '''
    actual = np.atleast_1d(np.asarray(actual))
    desired = np.atleast_1d(np.asarray(desired))
    mask_zero = desired == 0
    dig = np.zeros(desired.shape)
    dig[mask_zero] = -np.log10(np.abs(actual[mask_zero]))
    dig[~mask_zero] = -np.log10(np.abs(actual[~mask_zero]
                                       - desired[~mask_zero])
                                / np.abs(desired[~mask_zero]))
    if np.size(dig) == 1:
        dig = np.squeeze(dig)[()]
    return dig

ss='''\
            0.8116   -6.860120914
            0.9072   -4.324130045
            0.9052   -4.358625055
            0.9039   -4.358426747
            0.8053   -6.955852379
            0.8377   -6.661145254
            0.8667   -6.355462942
            0.8809   -6.118102026
            0.7975   -7.115148017
            0.8162   -6.815308569
            0.8515   -6.519993057
            0.8766   -6.204119983
            0.8885   -5.853871964
            0.8859   -6.109523091
            0.8959   -5.79832982
            0.8913   -5.482672118
            0.8959   -5.171791386
            0.8971   -4.851705903
            0.9021   -4.517126416
            0.909    -4.143573228
            0.9139   -3.709075441
            0.9199   -3.499489089
            0.8692   -6.300769497
            0.8872   -5.953504836
            0.89     -5.642065153
            0.891    -5.031376979
            0.8977   -4.680685696
            0.9035   -4.329846955
            0.9078   -3.928486195
            0.7675   -8.56735134
            0.7705   -8.363211311
            0.7713   -8.107682739
            0.7736   -7.823908741
            0.7775   -7.522878745
            0.7841   -7.218819279
            0.7971   -6.920818754
            0.8329   -6.628932138
            0.8641   -6.323946875
            0.8804   -5.991399828
            0.7668   -8.781464495
            0.7633   -8.663140179
            0.7678   -8.473531488
            0.7697   -8.247337057
            0.77     -7.971428747
            0.7749   -7.676129393
            0.7796   -7.352812702
            0.7897   -7.072065318
            0.8131   -6.774174009
            0.8498   -6.478861916
            0.8741   -6.159517513
            0.8061   -6.835647144
            0.846    -6.53165267
            0.8751   -6.224098421
            0.8856   -5.910094889
            0.8919   -5.598599459
            0.8934   -5.290645224
            0.894    -4.974284616
            0.8957   -4.64454848
            0.9047   -4.290560426
            0.9129   -3.885055584
            0.9209   -3.408378962
            0.9219   -3.13200249
            0.7739   -8.726767166
            0.7681   -8.66695597
            0.7665   -8.511026475
            0.7703   -8.165388579
            0.7702   -7.886056648
            0.7761   -7.588043762
            0.7809   -7.283412422
            0.7961   -6.995678626
            0.8253   -6.691862621
            0.8602   -6.392544977
            0.8809   -6.067374056
            0.8301   -6.684029655
            0.8664   -6.378719832
            0.8834   -6.065855188
            0.8898   -5.752272167
            0.8964   -5.132414673
            0.8963   -4.811352704
            0.9074   -4.098269308
            0.9119   -3.66174277
            0.9228   -3.2644011'''.split()
dta = np.array(ss, float).reshape(-1,2)

'''
               Certified Regression Statistics

                                            Standard Deviation
     Parameter         Estimate                of Estimate
'''
ss_params='''\
        B0        -1467.48961422980         298.084530995537
        B1        -2772.17959193342         559.779865474950
        B2        -2316.37108160893         466.477572127796
        B3        -1127.97394098372         227.204274477751
        B4        -354.478233703349         71.6478660875927
        B5        -75.1242017393757         15.2897178747400
        B6        -10.8753180355343         2.23691159816033
        B7        -1.06221498588947         0.221624321934227
        B8        -0.670191154593408E-01    0.142363763154724E-01
        B9        -0.246781078275479E-02    0.535617408889821E-03
        B10       -0.402962525080404E-04    0.896632837373868E-05'''.split()
params_nist = np.array(ss_params).reshape(-1,3)[:,1:].astype(float)


endog = dta[:,0]
exog0 = dta[:,1:]**np.arange(11)
res0 = sm.OLS(endog, exog0).fit()
print '\nresults original, params, bse'
print res0.params
print res0.bse

exog1 = exog0/np.max(np.abs(dta[:,1]), 0) #rescaled
#exog1 = (dta[:,1:] / np.abs(dta[:,1].min()))**np.arange(11) #rescaled
res1 = sm.OLS(endog, exog1).fit()
print '\nresults rescaled, params, bse (not scaled back)'
print res1.params
print res1.bse
print 'residual standard deviation'
print np.sqrt(res1.mse_resid), 0.334801051324544E-02

#print np.linalg.eigvalsh(res1.normalized_cov_params)

pp = np.polyfit(dta[:,1],dta[:,0],10)[::-1]
rp = res1.params / np.max(np.abs(dta[:,1]), 0)
#rp = res1.params / np.abs(dta[:,1].min())**np.arange(11)

print 'lre sm original'
print log_relative_error(res0.params, params_nist[:,0])

print 'lre sm rescaled'
print log_relative_error(rp, params_nist[:,0])

print 'lre polyfit'
print log_relative_error(pp, params_nist[:,0])
	# -- coding: utf-8 --
	"""Checking numerical Accuracy in Linear Regression

	Created on Sat Feb 25 20:36:50 2012

	Author: Josef Perktold
	License: BSD-3

	"""

	import numpy as np
	import scikits.statsmodels.api as sm

	def log_relative_error(actual, desired):
	'''calculate log relative error, number of correct significant digits


	Parameters
	----------
	actual : array_like
	actual values
	desired : array_like
	desired values

	Returns
	-------
	lre : ndarray
	number of significant digits, uses relative absolute difference if
	desired is not zero, and absolute difference if desired is zero.

	References
	----------
	http://en.wikibooks.org/wiki/Statistics:Numerical_Methods/Numerical_Comparison_of_Statistical_Software#Measuring_Accuracy
	http://digilander.libero.it/foxes/StRD_Benchmarks/X_NIST_StRD_Benchmarks.htm

	Examples
	--------
	>>> lre([1.000001, 1.000001e-6, 1.000001e30], [1, 1e-6, 1e30])
	array([ 6., 6., 6.])

	>>> lre(1.000001e-300,1e-300)
	6.0000000000043521

	>>> lre(0, 0)
	inf

	>>> lre(1.000001e-300,0)
	299.99999956570571

	'''
	actual = np.atleast_1d(np.asarray(actual))
	desired = np.atleast_1d(np.asarray(desired))
	mask_zero = desired == 0
	dig = np.zeros(desired.shape)
	dig[mask_zero] = -np.log10(np.abs(actual[mask_zero]))
	dig[~mask_zero] = -np.log10(np.abs(actual[~mask_zero]
	- desired[~mask_zero])
	/ np.abs(desired[~mask_zero]))
	if np.size(dig) == 1:
	dig = np.squeeze(dig)[()]
	return dig

	ss='''\
	0.8116 -6.860120914
	0.9072 -4.324130045
	0.9052 -4.358625055
	0.9039 -4.358426747
	0.8053 -6.955852379
	0.8377 -6.661145254
	0.8667 -6.355462942
	0.8809 -6.118102026
	0.7975 -7.115148017
	0.8162 -6.815308569
	0.8515 -6.519993057
	0.8766 -6.204119983
	0.8885 -5.853871964
	0.8859 -6.109523091
	0.8959 -5.79832982
	0.8913 -5.482672118
	0.8959 -5.171791386
	0.8971 -4.851705903
	0.9021 -4.517126416
	0.909 -4.143573228
	0.9139 -3.709075441
	0.9199 -3.499489089
	0.8692 -6.300769497
	0.8872 -5.953504836
	0.89 -5.642065153
	0.891 -5.031376979
	0.8977 -4.680685696
	0.9035 -4.329846955
	0.9078 -3.928486195
	0.7675 -8.56735134
	0.7705 -8.363211311
	0.7713 -8.107682739
	0.7736 -7.823908741
	0.7775 -7.522878745
	0.7841 -7.218819279
	0.7971 -6.920818754
	0.8329 -6.628932138
	0.8641 -6.323946875
	0.8804 -5.991399828
	0.7668 -8.781464495
	0.7633 -8.663140179
	0.7678 -8.473531488
	0.7697 -8.247337057
	0.77 -7.971428747
	0.7749 -7.676129393
	0.7796 -7.352812702
	0.7897 -7.072065318
	0.8131 -6.774174009
	0.8498 -6.478861916
	0.8741 -6.159517513
	0.8061 -6.835647144
	0.846 -6.53165267
	0.8751 -6.224098421
	0.8856 -5.910094889
	0.8919 -5.598599459
	0.8934 -5.290645224
	0.894 -4.974284616
	0.8957 -4.64454848
	0.9047 -4.290560426
	0.9129 -3.885055584
	0.9209 -3.408378962
	0.9219 -3.13200249
	0.7739 -8.726767166
	0.7681 -8.66695597
	0.7665 -8.511026475
	0.7703 -8.165388579
	0.7702 -7.886056648
	0.7761 -7.588043762
	0.7809 -7.283412422
	0.7961 -6.995678626
	0.8253 -6.691862621
	0.8602 -6.392544977
	0.8809 -6.067374056
	0.8301 -6.684029655
	0.8664 -6.378719832
	0.8834 -6.065855188
	0.8898 -5.752272167
	0.8964 -5.132414673
	0.8963 -4.811352704
	0.9074 -4.098269308
	0.9119 -3.66174277
	0.9228 -3.2644011'''.split()
	dta = np.array(ss, float).reshape(-1,2)

	'''
	Certified Regression Statistics

	Standard Deviation
	Parameter Estimate of Estimate
	'''
	ss_params='''\
	B0 -1467.48961422980 298.084530995537
	B1 -2772.17959193342 559.779865474950
	B2 -2316.37108160893 466.477572127796
	B3 -1127.97394098372 227.204274477751
	B4 -354.478233703349 71.6478660875927
	B5 -75.1242017393757 15.2897178747400
	B6 -10.8753180355343 2.23691159816033
	B7 -1.06221498588947 0.221624321934227
	B8 -0.670191154593408E-01 0.142363763154724E-01
	B9 -0.246781078275479E-02 0.535617408889821E-03
	B10 -0.402962525080404E-04 0.896632837373868E-05'''.split()
	params_nist = np.array(ss_params).reshape(-1,3)[:,1:].astype(float)


	endog = dta[:,0]
	exog0 = dta[:,1:]**np.arange(11)
	res0 = sm.OLS(endog, exog0).fit()
	print '\nresults original, params, bse'
	print res0.params
	print res0.bse

	exog1 = exog0/np.max(np.abs(dta[:,1]), 0) #rescaled
	#exog1 = (dta[:,1:] / np.abs(dta[:,1].min()))**np.arange(11) #rescaled
	res1 = sm.OLS(endog, exog1).fit()
	print '\nresults rescaled, params, bse (not scaled back)'
	print res1.params
	print res1.bse
	print 'residual standard deviation'
	print np.sqrt(res1.mse_resid), 0.334801051324544E-02

	#print np.linalg.eigvalsh(res1.normalized_cov_params)

	pp = np.polyfit(dta[:,1],dta[:,0],10)[::-1]
	rp = res1.params / np.max(np.abs(dta[:,1]), 0)
	#rp = res1.params / np.abs(dta[:,1].min())**np.arange(11)

	print 'lre sm original'
	print log_relative_error(res0.params, params_nist[:,0])

	print 'lre sm rescaled'
	print log_relative_error(rp, params_nist[:,0])

	print 'lre polyfit'
	print log_relative_error(pp, params_nist[:,0])