Created
March 4, 2012 17:43
-
-
Save josef-pkt/1974049 to your computer and use it in GitHub Desktop.
checking numerical accuracy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
"""Checking numerical Accuracy in Linear Regression | |
Created on Sat Feb 25 20:36:50 2012 | |
Author: Josef Perktold | |
License: BSD-3 | |
""" | |
import numpy as np | |
import scikits.statsmodels.api as sm | |
def log_relative_error(actual, desired): | |
'''calculate log relative error, number of correct significant digits | |
Parameters | |
---------- | |
actual : array_like | |
actual values | |
desired : array_like | |
desired values | |
Returns | |
------- | |
lre : ndarray | |
number of significant digits, uses relative absolute difference if | |
desired is not zero, and absolute difference if desired is zero. | |
References | |
---------- | |
http://en.wikibooks.org/wiki/Statistics:Numerical_Methods/Numerical_Comparison_of_Statistical_Software#Measuring_Accuracy | |
http://digilander.libero.it/foxes/StRD_Benchmarks/X_NIST_StRD_Benchmarks.htm | |
Examples | |
-------- | |
>>> lre([1.000001, 1.000001e-6, 1.000001e30], [1, 1e-6, 1e30]) | |
array([ 6., 6., 6.]) | |
>>> lre(1.000001e-300,1e-300) | |
6.0000000000043521 | |
>>> lre(0, 0) | |
inf | |
>>> lre(1.000001e-300,0) | |
299.99999956570571 | |
''' | |
actual = np.atleast_1d(np.asarray(actual)) | |
desired = np.atleast_1d(np.asarray(desired)) | |
mask_zero = desired == 0 | |
dig = np.zeros(desired.shape) | |
dig[mask_zero] = -np.log10(np.abs(actual[mask_zero])) | |
dig[~mask_zero] = -np.log10(np.abs(actual[~mask_zero] | |
- desired[~mask_zero]) | |
/ np.abs(desired[~mask_zero])) | |
if np.size(dig) == 1: | |
dig = np.squeeze(dig)[()] | |
return dig | |
ss='''\ | |
0.8116 -6.860120914 | |
0.9072 -4.324130045 | |
0.9052 -4.358625055 | |
0.9039 -4.358426747 | |
0.8053 -6.955852379 | |
0.8377 -6.661145254 | |
0.8667 -6.355462942 | |
0.8809 -6.118102026 | |
0.7975 -7.115148017 | |
0.8162 -6.815308569 | |
0.8515 -6.519993057 | |
0.8766 -6.204119983 | |
0.8885 -5.853871964 | |
0.8859 -6.109523091 | |
0.8959 -5.79832982 | |
0.8913 -5.482672118 | |
0.8959 -5.171791386 | |
0.8971 -4.851705903 | |
0.9021 -4.517126416 | |
0.909 -4.143573228 | |
0.9139 -3.709075441 | |
0.9199 -3.499489089 | |
0.8692 -6.300769497 | |
0.8872 -5.953504836 | |
0.89 -5.642065153 | |
0.891 -5.031376979 | |
0.8977 -4.680685696 | |
0.9035 -4.329846955 | |
0.9078 -3.928486195 | |
0.7675 -8.56735134 | |
0.7705 -8.363211311 | |
0.7713 -8.107682739 | |
0.7736 -7.823908741 | |
0.7775 -7.522878745 | |
0.7841 -7.218819279 | |
0.7971 -6.920818754 | |
0.8329 -6.628932138 | |
0.8641 -6.323946875 | |
0.8804 -5.991399828 | |
0.7668 -8.781464495 | |
0.7633 -8.663140179 | |
0.7678 -8.473531488 | |
0.7697 -8.247337057 | |
0.77 -7.971428747 | |
0.7749 -7.676129393 | |
0.7796 -7.352812702 | |
0.7897 -7.072065318 | |
0.8131 -6.774174009 | |
0.8498 -6.478861916 | |
0.8741 -6.159517513 | |
0.8061 -6.835647144 | |
0.846 -6.53165267 | |
0.8751 -6.224098421 | |
0.8856 -5.910094889 | |
0.8919 -5.598599459 | |
0.8934 -5.290645224 | |
0.894 -4.974284616 | |
0.8957 -4.64454848 | |
0.9047 -4.290560426 | |
0.9129 -3.885055584 | |
0.9209 -3.408378962 | |
0.9219 -3.13200249 | |
0.7739 -8.726767166 | |
0.7681 -8.66695597 | |
0.7665 -8.511026475 | |
0.7703 -8.165388579 | |
0.7702 -7.886056648 | |
0.7761 -7.588043762 | |
0.7809 -7.283412422 | |
0.7961 -6.995678626 | |
0.8253 -6.691862621 | |
0.8602 -6.392544977 | |
0.8809 -6.067374056 | |
0.8301 -6.684029655 | |
0.8664 -6.378719832 | |
0.8834 -6.065855188 | |
0.8898 -5.752272167 | |
0.8964 -5.132414673 | |
0.8963 -4.811352704 | |
0.9074 -4.098269308 | |
0.9119 -3.66174277 | |
0.9228 -3.2644011'''.split() | |
dta = np.array(ss, float).reshape(-1,2) | |
''' | |
Certified Regression Statistics | |
Standard Deviation | |
Parameter Estimate of Estimate | |
''' | |
ss_params='''\ | |
B0 -1467.48961422980 298.084530995537 | |
B1 -2772.17959193342 559.779865474950 | |
B2 -2316.37108160893 466.477572127796 | |
B3 -1127.97394098372 227.204274477751 | |
B4 -354.478233703349 71.6478660875927 | |
B5 -75.1242017393757 15.2897178747400 | |
B6 -10.8753180355343 2.23691159816033 | |
B7 -1.06221498588947 0.221624321934227 | |
B8 -0.670191154593408E-01 0.142363763154724E-01 | |
B9 -0.246781078275479E-02 0.535617408889821E-03 | |
B10 -0.402962525080404E-04 0.896632837373868E-05'''.split() | |
params_nist = np.array(ss_params).reshape(-1,3)[:,1:].astype(float) | |
endog = dta[:,0] | |
exog0 = dta[:,1:]**np.arange(11) | |
res0 = sm.OLS(endog, exog0).fit() | |
print '\nresults original, params, bse' | |
print res0.params | |
print res0.bse | |
exog1 = exog0/np.max(np.abs(dta[:,1]), 0) #rescaled | |
#exog1 = (dta[:,1:] / np.abs(dta[:,1].min()))**np.arange(11) #rescaled | |
res1 = sm.OLS(endog, exog1).fit() | |
print '\nresults rescaled, params, bse (not scaled back)' | |
print res1.params | |
print res1.bse | |
print 'residual standard deviation' | |
print np.sqrt(res1.mse_resid), 0.334801051324544E-02 | |
#print np.linalg.eigvalsh(res1.normalized_cov_params) | |
pp = np.polyfit(dta[:,1],dta[:,0],10)[::-1] | |
rp = res1.params / np.max(np.abs(dta[:,1]), 0) | |
#rp = res1.params / np.abs(dta[:,1].min())**np.arange(11) | |
print 'lre sm original' | |
print log_relative_error(res0.params, params_nist[:,0]) | |
print 'lre sm rescaled' | |
print log_relative_error(rp, params_nist[:,0]) | |
print 'lre polyfit' | |
print log_relative_error(pp, params_nist[:,0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment