Skip to content

Instantly share code, notes, and snippets.

@ericqu
Created November 22, 2022 00:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericqu/7d394609df169b14662f77cb1afe10c2 to your computer and use it in GitHub Desktop.
Save ericqu/7d394609df169b14662f77cb1afe10c2 to your computer and use it in GitHub Desktop.
Quick analysis of the error associated with estimated coefficient in linear regression contrasted with the condition number
using LinearRegressionKit, StatsModels, DataFrames, CSV
using GLM, Statistics
using CategoricalArrays, DataFramesMeta
using VegaLite
# versioninfo()
# import Pkg
# Pkg.status()
# Julia Version 1.8.3
# Commit 0434deb161e (2022-11-14 20:14 UTC)
# Platform Info:
# OS: macOS (arm64-apple-darwin21.3.0)
# CPU: 8 × Apple M1
# WORD_SIZE: 64
# LIBM: libopenlibm
# LLVM: libLLVM-13.0.1 (ORCJIT, apple-m1)
# Threads: 1 on 4 virtual cores
# Environment:
# JULIA_EDITOR = code
# JULIA_NUM_THREADS =
# Status `~/.julia/environments/v1.8/Project.toml`
# [336ed68f] CSV v0.10.7
# [324d7699] CategoricalArrays v0.10.7
# ⌅ [a93c6f00] DataFrames v1.2.2
# [1313f7d8] DataFramesMeta v0.12.0
# [38e38edf] GLM v1.8.1
# [e91d531d] LinearRegressionKit v0.7.8
# [3eaba693] StatsModels v0.6.33
# [112f6efa] VegaLite v2.6.0
delta(a,b) = abs(a-b) ≈ 0. ? 0. : abs(a-b)
# Norris
y = [0.1, 338.8, 118.1, 888.0, 9.2, 228.1, 668.5, 998.5, 449.1, 778.9, 559.2, 0.3, 0.1, 778.1, 668.8, 339.3, 448.9, 10.8, 557.7, 228.3, 998.0, 888.8, 119.6, 0.3, 0.6, 557.6, 339.3, 888.0, 998.5, 778.9, 10.2, 117.6, 228.9, 668.4, 449.2, 0.2]
x = [0.2, 337.4, 118.2, 884.6, 10.1, 226.5, 666.3, 996.3, 448.6, 777.0, 558.2, 0.4, 0.6, 775.5, 666.9, 338.0, 447.5, 11.6, 556.0, 228.1, 995.8, 887.6, 120.2, 0.3, 0.3, 556.8, 339.1, 887.2, 999.0, 779.0, 11.1, 118.3, 229.2, 669.1, 448.9, 0.5]
df = DataFrame(x= x, y= y)
f = @formula(y ~ x)
lrk= regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
norris_cond = lrk.cond
norris_mce_lrk = mean([delta(-0.262323073774029, lrk.coefs[1]),
delta(1.00211681802045, lrk.coefs[2])])
norris_mce_glm = mean([delta(-0.262323073774029, coef(lr)[1]),
delta(1.00211681802045, coef(lr)[2]) ])
norris_mce_lrqr = mean([delta(-0.262323073774029, lrqr[1]),
delta(1.00211681802045, lrqr[2]) ])
# Pontius
y = [0.11019, 0.21956, 0.32949, 0.43899, 0.54803, 0.65694, 0.76562, 0.87487, 0.98292, 1.09146, 1.20001, 1.30822, 1.41599, 1.52399, 1.63194, 1.73947, 1.84646, 1.95392, 2.06128, 2.16844, 0.11052, 0.22018, 0.32939, 0.43886, 0.54798, 0.65739, 0.76596, 0.87474, 0.983, 1.0915, 1.20004, 1.30818, 1.41613, 1.52408, 1.63159, 1.73965, 1.84696, 1.95445, 2.06177, 2.16829]
x = [150000, 300000, 450000, 600000, 750000, 900000, 1050000, 1200000, 1350000, 1500000, 1650000, 1800000, 1950000, 2100000, 2250000, 2400000, 2550000, 2700000, 2850000, 3000000, 150000, 300000, 450000, 600000, 750000, 900000, 1050000, 1200000, 1350000, 1500000, 1650000, 1800000, 1950000, 2100000, 2250000, 2400000, 2550000, 2700000, 2850000, 3000000]
df = DataFrame(x= x, y= y)
f = @formula(y ~ x + x^2)
lrk= regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
pontius_cond = lrk.cond
pontius_mce_lrk = mean([delta(0.673565789473684E-03, lrk.coefs[1]),
delta(0.732059160401003E-06, lrk.coefs[2]),
delta(-0.316081871345029E-14, lrk.coefs[3]),
])
pontius_mce_glm = mean([delta(0.673565789473684E-03, coef(lr)[1]),
delta(0.732059160401003E-06, coef(lr)[2]),
delta(-0.316081871345029E-14, coef(lr)[3]),
])
pontius_mce_lrqr = mean([delta(0.673565789473684E-03, lrqr[1]),
delta(0.732059160401003E-06, lrqr[2]),
delta(-0.316081871345029E-14, lrqr[3]),
])
# No Int 1
y = [130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140]
x = [60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
df = DataFrame(x= x, y= y)
f = @formula(y ~ 0 + x )
lrk = regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
noint1_cond = lrk.cond
noint1_mce_lrk = mean([delta(2.07438016528926, lrk.coefs[1])])
noint1_mce_glm = mean([delta(2.07438016528926, coef(lr)[1])])
noint1_mce_lrqr = mean([delta(2.07438016528926, lrqr[1])])
# No Int 2
y = [3, 4, 4]
x = [4, 5, 6]
df = DataFrame(x= x, y= y)
f = @formula(y ~ 0 + x )
lrk = regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
noint2_cond = lrk.cond
noint2_mce_lrk = mean([delta(0.727272727272727, lrk.coefs[1])])
noint2_mce_glm = mean([delta(0.727272727272727, coef(lr)[1])])
noint2_mce_lrqr = mean([delta(0.727272727272727, lrqr[1])])
# Filip
y = [0.8116, 0.9072, 0.9052, 0.9039, 0.8053, 0.8377, 0.8667, 0.8809, 0.7975, 0.8162, 0.8515, 0.8766, 0.8885, 0.8859, 0.8959, 0.8913, 0.8959, 0.8971, 0.9021, 0.909, 0.9139, 0.9199, 0.8692, 0.8872, 0.89, 0.891, 0.8977, 0.9035, 0.9078, 0.7675, 0.7705, 0.7713, 0.7736, 0.7775, 0.7841, 0.7971, 0.8329, 0.8641, 0.8804, 0.7668, 0.7633, 0.7678, 0.7697, 0.77, 0.7749, 0.7796, 0.7897, 0.8131, 0.8498, 0.8741, 0.8061, 0.846, 0.8751, 0.8856, 0.8919, 0.8934, 0.894, 0.8957, 0.9047, 0.9129, 0.9209, 0.9219, 0.7739, 0.7681, 0.7665, 0.7703, 0.7702, 0.7761, 0.7809, 0.7961, 0.8253, 0.8602, 0.8809, 0.8301, 0.8664, 0.8834, 0.8898, 0.8964, 0.8963, 0.9074, 0.9119, 0.9228]
x = [-6.860120914, -4.324130045, -4.358625055, -4.358426747, -6.955852379, -6.661145254, -6.355462942, -6.118102026, -7.115148017, -6.815308569, -6.519993057, -6.204119983, -5.853871964, -6.109523091, -5.79832982, -5.482672118, -5.171791386, -4.851705903, -4.517126416, -4.143573228, -3.709075441, -3.499489089, -6.300769497, -5.953504836, -5.642065153, -5.031376979, -4.680685696, -4.329846955, -3.928486195, -8.56735134, -8.363211311, -8.107682739, -7.823908741, -7.522878745, -7.218819279, -6.920818754, -6.628932138, -6.323946875, -5.991399828, -8.781464495, -8.663140179, -8.473531488, -8.247337057, -7.971428747, -7.676129393, -7.352812702, -7.072065318, -6.774174009, -6.478861916, -6.159517513, -6.835647144, -6.53165267, -6.224098421, -5.910094889, -5.598599459, -5.290645224, -4.974284616, -4.64454848, -4.290560426, -3.885055584, -3.408378962, -3.13200249, -8.726767166, -8.66695597, -8.511026475, -8.165388579, -7.886056648, -7.588043762, -7.283412422, -6.995678626, -6.691862621, -6.392544977, -6.067374056, -6.684029655, -6.378719832, -6.065855188, -5.752272167, -5.132414673, -4.811352704, -4.098269308, -3.66174277, -3.2644011]
df = DataFrame(x= x, y= y)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5 + x^6 + x^7 + x^8 + x^9 + x^10)
lrk = regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
filip_cond = lrk.cond
filip_mce_lrk = mean([delta(-1467.48961422980, lrk.coefs[1]),
delta(-2772.17959193342, lrk.coefs[2]),
delta(-2316.37108160893, lrk.coefs[3]),
delta(-1127.97394098372, lrk.coefs[4]),
delta(-354.478233703349, lrk.coefs[5]),
delta(-75.1242017393757, lrk.coefs[6]),
delta(-10.8753180355343, lrk.coefs[7]),
delta(-1.06221498588947, lrk.coefs[8]),
delta(-0.670191154593408E-01, lrk.coefs[9]),
delta(-0.246781078275479E-02, lrk.coefs[10]),
delta(-0.402962525080404E-04, lrk.coefs[11]),
])
filip_mce_glm = mean([delta(-1467.48961422980, coef(lr)[1]),
delta(-2772.17959193342, coef(lr)[2]),
delta(-2316.37108160893, coef(lr)[3]),
delta(-1127.97394098372, coef(lr)[4]),
delta(-354.478233703349, coef(lr)[5]),
delta(-75.1242017393757, coef(lr)[6]),
delta(-10.8753180355343, coef(lr)[7]),
delta(-1.06221498588947, coef(lr)[8]),
delta(-0.670191154593408E-01, coef(lr)[9]),
delta(-0.246781078275479E-02, coef(lr)[10]),
delta(-0.402962525080404E-04, coef(lr)[11]),
])
filip_mce_lrqr = mean([delta(-1467.48961422980, lrqr[1]),
delta(-2772.17959193342, lrqr[2]),
delta(-2316.37108160893, lrqr[3]),
delta(-1127.97394098372, lrqr[4]),
delta(-354.478233703349, lrqr[5]),
delta(-75.1242017393757, lrqr[6]),
delta(-10.8753180355343, lrqr[7]),
delta(-1.06221498588947, lrqr[8]),
delta(-0.670191154593408E-01, lrqr[9]),
delta(-0.246781078275479E-02, lrqr[10]),
delta(-0.402962525080404E-04, lrqr[11]),
])
# Longley
y = [60323, 61122, 60171, 61187, 63221, 63639, 64989, 63761, 66019, 67857, 68169, 66513, 68655, 69564, 69331, 70551]
x1 = [83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]
x2 = [234289, 259426, 258054, 284599, 328975, 346999, 365385, 363112, 397469, 419180, 442769, 444546, 482704, 502601, 518173, 554894]
x3 = [2356, 2325, 3682, 3351, 2099, 1932, 1870, 3578, 2904, 2822, 2936, 4681, 3813, 3931, 4806, 4007]
x4 = [1590, 1456, 1616, 1650, 3099, 3594, 3547, 3350, 3048, 2857, 2798, 2637, 2552, 2514, 2572, 2827]
x5 = [107608, 108632, 109773, 110929, 112075, 113270, 115094, 116219, 117388, 118734, 120445, 121950, 123366, 125368, 127852, 130081]
x6 = [1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962]
df = DataFrame(y = y, x1 = x1, x2 = x2, x3 = x3, x4 = x4, x5 = x5, x6 = x6)
f = @formula(y ~ x1 + x2 + x3 + x4 + x5 + x6)
lrk = regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
longley_cond = lrk.cond
longley_mce_lrk = mean([delta(-3482258.63459582, lrk.coefs[1]),
delta(15.0618722713733, lrk.coefs[2]),
delta(-0.358191792925910E-01, lrk.coefs[3]),
delta(-2.02022980381683, lrk.coefs[4]),
delta(-1.03322686717359, lrk.coefs[5]),
delta(-0.511041056535807E-01, lrk.coefs[6]),
delta(1829.15146461355, lrk.coefs[7]),
])
longley_mce_glm = mean([delta(-3482258.63459582, coef(lr)[1]),
delta(15.0618722713733, coef(lr)[2]),
delta(-0.358191792925910E-01, coef(lr)[3]),
delta(-2.02022980381683, coef(lr)[4]),
delta(-1.03322686717359, coef(lr)[5]),
delta(-0.511041056535807E-01, coef(lr)[6]),
delta(1829.15146461355, coef(lr)[7]),
])
longley_mce_lrqr = mean([delta(-3482258.63459582, lrqr[1]),
delta(15.0618722713733, lrqr[2]),
delta(-0.358191792925910E-01, lrqr[3]),
delta(-2.02022980381683, lrqr[4]),
delta(-1.03322686717359, lrqr[5]),
delta(-0.511041056535807E-01, lrqr[6]),
delta(1829.15146461355, lrqr[7]),
])
# Wampler 1
y = [1, 6, 63, 364, 1365, 3906, 9331, 19608, 37449, 66430, 111111, 177156, 271453, 402234, 579195, 813616, 1118481, 1508598, 2000719, 2613660, 3368421]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(y = y, x = x)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5 )
lrk= regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
wampler1_cond = lrk.cond
wampler1_mce_lrk = mean([delta(1., lrk.coefs[1]),
delta(1., lrk.coefs[2]),
delta(1., lrk.coefs[3]),
delta(1., lrk.coefs[4]),
delta(1., lrk.coefs[5]),
delta(1., lrk.coefs[6]),
])
wampler1_mce_glm = mean([delta(1., coef(lr)[1]),
delta(1., coef(lr)[2]),
delta(1., coef(lr)[3]),
delta(1., coef(lr)[4]),
delta(1., coef(lr)[5]),
delta(1., coef(lr)[6]),
])
wampler1_mce_lrqr = mean([delta(1., lrqr[1]),
delta(1., lrqr[2]),
delta(1., lrqr[3]),
delta(1., lrqr[4]),
delta(1., lrqr[5]),
delta(1., lrqr[6]),
])
# Wampler 2
y = [1.0, 1.11111, 1.24992, 1.42753, 1.65984, 1.96875, 2.38336, 2.94117, 3.68928, 4.68559, 6.0, 7.71561, 9.92992, 12.75603, 16.32384, 20.78125, 26.29536, 33.05367, 41.26528, 51.16209, 63.0]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(y = y, x = x)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5)
lrk = regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
wampler2_cond = lrk.cond
wampler2_mce_lrk = mean([delta(1., lrk.coefs[1]),
delta(0.1, lrk.coefs[2]),
delta(0.100000000000000E-01, lrk.coefs[3]),
delta(0.100000000000000E-02, lrk.coefs[4]),
delta(0.100000000000000E-03, lrk.coefs[5]),
delta(0.100000000000000E-04, lrk.coefs[6]),
])
wampler2_mce_glm = mean([delta(1., coef(lr)[1]),
delta(0.1, coef(lr)[2]),
delta(0.100000000000000E-01, coef(lr)[3]),
delta(0.100000000000000E-02, coef(lr)[4]),
delta(0.100000000000000E-03, coef(lr)[5]),
delta(0.100000000000000E-04, coef(lr)[6]),
])
wampler2_mce_lrqr = mean([delta(1., lrqr[1]),
delta(0.1, lrqr[2]),
delta(0.100000000000000E-01, lrqr[3]),
delta(0.100000000000000E-02, lrqr[4]),
delta(0.100000000000000E-03, lrqr[5]),
delta(0.100000000000000E-04, lrqr[6]),
])
# Wampler 3
y = [760.0, -2042.0, 2111.0, -1684.0, 3888.0, 1858.0, 11379.0, 17560.0, 39287.0, 64382.0, 113159.0, 175108.0, 273291.0, 400186.0, 581243.0, 811568.0, 1.121004e6, 1.50655e6, 2.002767e6, 2.611612e6, 3.36918e6]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(y = y, x = x)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5)
lrk = regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
wampler3_cond = lrk.cond
wampler3_mce_lrk = mean([delta(1., lrk.coefs[1]),
delta(1., lrk.coefs[2]),
delta(1., lrk.coefs[3]),
delta(1., lrk.coefs[4]),
delta(1., lrk.coefs[5]),
delta(1., lrk.coefs[6]),
])
wampler3_mce_glm = mean([delta(1., coef(lr)[1]),
delta(1., coef(lr)[2]),
delta(1., coef(lr)[3]),
delta(1., coef(lr)[4]),
delta(1., coef(lr)[5]),
delta(1., coef(lr)[6]),
])
wampler3_mce_lrqr = mean([delta(1., lrqr[1]),
delta(1., lrqr[2]),
delta(1., lrqr[3]),
delta(1., lrqr[4]),
delta(1., lrqr[5]),
delta(1., lrqr[6]),
])
# Wampler 4
y = [75901, -204794, 204863, -204436, 253665, -200894, 214131, -185192, 221249, -138370, 315911, -27644, 455253, 197434, 783995, 608816, 1370781, 1303798, 2205519, 2408860, 3444321]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(y = y, x = x)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5)
lrk = regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
wampler4_cond = lrk.cond
wampler4_mce_lrk = mean([delta(1., lrk.coefs[1]),
delta(1., lrk.coefs[2]),
delta(1., lrk.coefs[3]),
delta(1., lrk.coefs[4]),
delta(1., lrk.coefs[5]),
delta(1., lrk.coefs[6]),
])
wampler4_mce_glm = mean([delta(1., coef(lr)[1]),
delta(1., coef(lr)[2]),
delta(1., coef(lr)[3]),
delta(1., coef(lr)[4]),
delta(1., coef(lr)[5]),
delta(1., coef(lr)[6]),
])
wampler4_mce_lrqr = mean([delta(1., lrqr[1]),
delta(1., lrqr[2]),
delta(1., lrqr[3]),
delta(1., lrqr[4]),
delta(1., lrqr[5]),
delta(1., lrqr[6]),
])
# Wampler 5
y = [75901, -204794, 204863, -204436, 253665, -200894, 214131, -185192, 221249, -138370, 315911, -27644, 455253, 197434, 783995, 608816, 1370781, 1303798, 2205519, 2408860, 3444321]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(y = y, x = x)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5)
lrk = regress(f, df, req_stats=[:default, :cond ])
lr = GLM.lm(f, df)
X, y, n, p = LinearRegressionKit.design_matrix!(f, df)
lrqr = (X \ y)
wampler5_cond = lrk.cond
wampler5_mce_lrk = mean([delta(1., lrk.coefs[1]),
delta(1., lrk.coefs[2]),
delta(1., lrk.coefs[3]),
delta(1., lrk.coefs[4]),
delta(1., lrk.coefs[5]),
delta(1., lrk.coefs[6]),
])
wampler5_mce_glm = mean([delta(1., coef(lr)[1]),
delta(1., coef(lr)[2]),
delta(1., coef(lr)[3]),
delta(1., coef(lr)[4]),
delta(1., coef(lr)[5]),
delta(1., coef(lr)[6]),
])
wampler5_mce_lrqr = mean([delta(1., lrqr[1]),
delta(1., lrqr[2]),
delta(1., lrqr[3]),
delta(1., lrqr[4]),
delta(1., lrqr[5]),
delta(1., lrqr[6]),
])
# put results together
lrk_mces = [norris_mce_lrk, pontius_mce_lrk, noint1_mce_lrk, noint2_mce_lrk, filip_mce_lrk, longley_mce_lrk,
wampler1_mce_lrk, wampler2_mce_lrk, wampler3_mce_lrk, wampler4_mce_lrk, wampler5_mce_lrk]
glm_mces = [norris_mce_glm, pontius_mce_glm, noint1_mce_glm, noint2_mce_glm, filip_mce_glm, longley_mce_glm,
wampler1_mce_glm, wampler2_mce_glm, wampler3_mce_glm, wampler4_mce_glm, wampler5_mce_glm]
lrqr_mces = [norris_mce_lrqr, pontius_mce_lrqr, noint1_mce_lrqr, noint2_mce_lrqr, filip_mce_lrqr, longley_mce_lrqr,
wampler1_mce_lrqr, wampler2_mce_lrqr, wampler3_mce_lrqr, wampler4_mce_lrqr, wampler5_mce_lrqr]
# quick an dirty replacement of the zero to be able to get the log of the vector
lrqr_mces = [norris_mce_lrqr, pontius_mce_lrqr, noint1_mce_lrqr, eps(Float32), filip_mce_lrqr, longley_mce_lrqr,
wampler1_mce_lrqr, wampler2_mce_lrqr, wampler3_mce_lrqr, wampler4_mce_lrqr, wampler5_mce_lrqr]
conds = [norris_cond, pontius_cond, noint1_cond, noint2_cond, filip_cond, longley_cond,
wampler1_cond, wampler2_cond, wampler3_cond, wampler4_cond, wampler5_cond]
results_df = DataFrame( lrk_mces = lrk_mces, glm_mces = glm_mces, lrqr_mces = lrqr_mces, conds=conds)
# Row │ lrk_mces glm_mces lrqr_mces conds
# │ Float64 Float64 Float64 Float64
# ─────┼───────────────────────────────────────────────────────────────
# 1 │ 8.1074e-14 5.65659e-14 1.33227e-14 1362.31
# 2 │ 1.02219e-15 0.000224522 1.49222e-16 2.16738e16
# 3 │ 3.9968e-15 3.9968e-15 3.9968e-15 101.499
# 4 │ 3.33067e-16 2.22045e-16 1.19209e-7 25.7108
# 5 │ 708.127 738.694 738.754 2.05099e15
# 6 │ 1.95531e-5 497730.0 2.04614e-7 8.07878e9
# 7 │ 5.77698e-8 6.59178e-8 2.07236e-10 3.8327e16
# 8 │ 1.36005e-12 7.42606e-13 4.25137e-16 1.20013e21
# 9 │ 5.77698e-8 6.59178e-8 6.95803e-11 9.30321e6
# 10 │ 5.77698e-8 6.59178e-8 8.65831e-10 9.3434e6
# 11 │ 5.77698e-8 6.59178e-8 8.65831e-10 9.3434e6
s_results_df = stack(results_df, [:lrk_mces, :glm_mces, :lrqr_mces ])
@transform!(s_results_df, :variable = categorical(:variable))
# scatter plot
p = s_results_df |> @vlplot(
title = "Condition number vs mean error of the coefs",
width = 400, height = 400,
:point,
x= {:conds, scale={type= :log}},
y= {:value, scale={type= :log}},
color= :variable,
)
p |> save("figure.png")
# regression of the results
f = @formula(log(lrk_mces) ~ log(conds))
lrk = regress(f, results_df, req_stats=[:default, :cond ])
f = @formula(log(glm_mces) ~ log(conds))
lrk = regress(f, results_df, req_stats=[:default, :cond ])
f = @formula(log(lrqr_mces) ~ log(conds))
lrk = regress(f, results_df, req_stats=[:default, :cond ])
f = @formula(log(value) ~ log(conds) + variable)
lrk = regress(f, s_results_df, req_stats=[:default, :cond ]
, contrasts= Dict(:variable => DummyCoding(base="lrk_mces")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment