Create a gist now

Instantly share code, notes, and snippets.

checking numerical accuracy
# -*- coding: utf-8 -*-
"""Checking numerical Accuracy in Linear Regression
Created on Sat Feb 25 20:36:50 2012
Author: Josef Perktold
License: BSD-3
"""
import numpy as np
import scikits.statsmodels.api as sm
def log_relative_error(actual, desired):
'''calculate log relative error, number of correct significant digits
Parameters
----------
actual : array_like
actual values
desired : array_like
desired values
Returns
-------
lre : ndarray
number of significant digits, uses relative absolute difference if
desired is not zero, and absolute difference if desired is zero.
References
----------
http://en.wikibooks.org/wiki/Statistics:Numerical_Methods/Numerical_Comparison_of_Statistical_Software#Measuring_Accuracy
http://digilander.libero.it/foxes/StRD_Benchmarks/X_NIST_StRD_Benchmarks.htm
Examples
--------
>>> lre([1.000001, 1.000001e-6, 1.000001e30], [1, 1e-6, 1e30])
array([ 6., 6., 6.])
>>> lre(1.000001e-300,1e-300)
6.0000000000043521
>>> lre(0, 0)
inf
>>> lre(1.000001e-300,0)
299.99999956570571
'''
actual = np.atleast_1d(np.asarray(actual))
desired = np.atleast_1d(np.asarray(desired))
mask_zero = desired == 0
dig = np.zeros(desired.shape)
dig[mask_zero] = -np.log10(np.abs(actual[mask_zero]))
dig[~mask_zero] = -np.log10(np.abs(actual[~mask_zero]
- desired[~mask_zero])
/ np.abs(desired[~mask_zero]))
if np.size(dig) == 1:
dig = np.squeeze(dig)[()]
return dig
ss='''\
0.8116 -6.860120914
0.9072 -4.324130045
0.9052 -4.358625055
0.9039 -4.358426747
0.8053 -6.955852379
0.8377 -6.661145254
0.8667 -6.355462942
0.8809 -6.118102026
0.7975 -7.115148017
0.8162 -6.815308569
0.8515 -6.519993057
0.8766 -6.204119983
0.8885 -5.853871964
0.8859 -6.109523091
0.8959 -5.79832982
0.8913 -5.482672118
0.8959 -5.171791386
0.8971 -4.851705903
0.9021 -4.517126416
0.909 -4.143573228
0.9139 -3.709075441
0.9199 -3.499489089
0.8692 -6.300769497
0.8872 -5.953504836
0.89 -5.642065153
0.891 -5.031376979
0.8977 -4.680685696
0.9035 -4.329846955
0.9078 -3.928486195
0.7675 -8.56735134
0.7705 -8.363211311
0.7713 -8.107682739
0.7736 -7.823908741
0.7775 -7.522878745
0.7841 -7.218819279
0.7971 -6.920818754
0.8329 -6.628932138
0.8641 -6.323946875
0.8804 -5.991399828
0.7668 -8.781464495
0.7633 -8.663140179
0.7678 -8.473531488
0.7697 -8.247337057
0.77 -7.971428747
0.7749 -7.676129393
0.7796 -7.352812702
0.7897 -7.072065318
0.8131 -6.774174009
0.8498 -6.478861916
0.8741 -6.159517513
0.8061 -6.835647144
0.846 -6.53165267
0.8751 -6.224098421
0.8856 -5.910094889
0.8919 -5.598599459
0.8934 -5.290645224
0.894 -4.974284616
0.8957 -4.64454848
0.9047 -4.290560426
0.9129 -3.885055584
0.9209 -3.408378962
0.9219 -3.13200249
0.7739 -8.726767166
0.7681 -8.66695597
0.7665 -8.511026475
0.7703 -8.165388579
0.7702 -7.886056648
0.7761 -7.588043762
0.7809 -7.283412422
0.7961 -6.995678626
0.8253 -6.691862621
0.8602 -6.392544977
0.8809 -6.067374056
0.8301 -6.684029655
0.8664 -6.378719832
0.8834 -6.065855188
0.8898 -5.752272167
0.8964 -5.132414673
0.8963 -4.811352704
0.9074 -4.098269308
0.9119 -3.66174277
0.9228 -3.2644011'''.split()
dta = np.array(ss, float).reshape(-1,2)
'''
Certified Regression Statistics
Standard Deviation
Parameter Estimate of Estimate
'''
ss_params='''\
B0 -1467.48961422980 298.084530995537
B1 -2772.17959193342 559.779865474950
B2 -2316.37108160893 466.477572127796
B3 -1127.97394098372 227.204274477751
B4 -354.478233703349 71.6478660875927
B5 -75.1242017393757 15.2897178747400
B6 -10.8753180355343 2.23691159816033
B7 -1.06221498588947 0.221624321934227
B8 -0.670191154593408E-01 0.142363763154724E-01
B9 -0.246781078275479E-02 0.535617408889821E-03
B10 -0.402962525080404E-04 0.896632837373868E-05'''.split()
params_nist = np.array(ss_params).reshape(-1,3)[:,1:].astype(float)
endog = dta[:,0]
exog0 = dta[:,1:]**np.arange(11)
res0 = sm.OLS(endog, exog0).fit()
print '\nresults original, params, bse'
print res0.params
print res0.bse
exog1 = exog0/np.max(np.abs(dta[:,1]), 0) #rescaled
#exog1 = (dta[:,1:] / np.abs(dta[:,1].min()))**np.arange(11) #rescaled
res1 = sm.OLS(endog, exog1).fit()
print '\nresults rescaled, params, bse (not scaled back)'
print res1.params
print res1.bse
print 'residual standard deviation'
print np.sqrt(res1.mse_resid), 0.334801051324544E-02
#print np.linalg.eigvalsh(res1.normalized_cov_params)
pp = np.polyfit(dta[:,1],dta[:,0],10)[::-1]
rp = res1.params / np.max(np.abs(dta[:,1]), 0)
#rp = res1.params / np.abs(dta[:,1].min())**np.arange(11)
print 'lre sm original'
print log_relative_error(res0.params, params_nist[:,0])
print 'lre sm rescaled'
print log_relative_error(rp, params_nist[:,0])
print 'lre polyfit'
print log_relative_error(pp, params_nist[:,0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment