Skip to content

Instantly share code, notes, and snippets.

@ericqu
ericqu / measure_coeff_error.jl
Created November 22, 2022 00:34
Quick analysis of the error associated with estimated coefficient in linear regression contrasted with the condition number
using LinearRegressionKit, StatsModels, DataFrames, CSV
using GLM, Statistics
using CategoricalArrays, DataFramesMeta
using VegaLite
# versioninfo()
# import Pkg
# Pkg.status()
# Julia Version 1.8.3
using LinearRegressionKit, StatsModels, DataFrames, CSV
y = [0.8116, 0.9072, 0.9052, 0.9039, 0.8053, 0.8377, 0.8667, 0.8809, 0.7975, 0.8162, 0.8515, 0.8766, 0.8885, 0.8859, 0.8959, 0.8913, 0.8959, 0.8971, 0.9021, 0.909, 0.9139, 0.9199, 0.8692, 0.8872, 0.89, 0.891, 0.8977, 0.9035, 0.9078, 0.7675, 0.7705, 0.7713, 0.7736, 0.7775, 0.7841, 0.7971, 0.8329, 0.8641, 0.8804, 0.7668, 0.7633, 0.7678, 0.7697, 0.77, 0.7749, 0.7796, 0.7897, 0.8131, 0.8498, 0.8741, 0.8061, 0.846, 0.8751, 0.8856, 0.8919, 0.8934, 0.894, 0.8957, 0.9047, 0.9129, 0.9209, 0.9219, 0.7739, 0.7681, 0.7665, 0.7703, 0.7702, 0.7761, 0.7809, 0.7961, 0.8253, 0.8602, 0.8809, 0.8301, 0.8664, 0.8834, 0.8898, 0.8964, 0.8963, 0.9074, 0.9119, 0.9228]
x = [-6.860120914, -4.324130045, -4.358625055, -4.358426747, -6.955852379, -6.661145254, -6.355462942, -6.118102026, -7.115148017, -6.815308569, -6.519993057, -6.204119983, -5.853871964, -6.109523091, -5.79832982, -5.482672118, -5.171791386, -4.851705903, -4.517126416, -4.143573228, -3.709075441, -3.499489089, -6
using LinearRegressionKit, StatsModels, DataFrames, CSV
y = [75901, -204794, 204863, -204436, 253665, -200894, 214131, -185192, 221249, -138370, 315911, -27644, 455253, 197434, 783995, 608816, 1370781, 1303798, 2205519, 2408860, 3444321]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(CSV.File("wangler5.csv"))
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5 )
lrk= regress(f, df, req_stats=["default"])
lrk
using LinearRegressionKit, StatsModels, DataFrames, CSV
y = [75901, -204794, 204863, -204436, 253665, -200894, 214131, -185192, 221249, -138370, 315911, -27644, 455253, 197434, 783995, 608816, 1370781, 1303798, 2205519, 2408860, 3444321]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(y = y, x = x)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5 )
lrk= regress(f, df, req_stats=["default"])
lrk
using LinearRegressionKit, StatsModels, DataFrames, CSV
y = [760.0, -2042.0, 2111.0, -1684.0, 3888.0, 1858.0, 11379.0, 17560.0, 39287.0, 64382.0, 113159.0, 175108.0, 273291.0, 400186.0, 581243.0, 811568.0, 1.121004e6, 1.50655e6, 2.002767e6, 2.611612e6, 3.36918e6]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(y = y, x = x)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5 )
lrk= regress(f, df, req_stats=["default"])
lrk
using LinearRegressionKit, StatsModels, DataFrames, CSV
y = [1.0, 1.11111, 1.24992, 1.42753, 1.65984, 1.96875, 2.38336, 2.94117, 3.68928, 4.68559, 6.0, 7.71561, 9.92992, 12.75603, 16.32384, 20.78125, 26.29536, 33.05367, 41.26528, 51.16209, 63.0]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(y = y, x = x)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5 )
lrk= regress(f, df, req_stats=["default"])
lrk
@ericqu
ericqu / test_wrangler1.jl
Created June 30, 2022 19:44
wrangler1.jl
using LinearRegressionKit, StatsModels, DataFrames, CSV
y = [1, 6, 63, 364, 1365, 3906, 9331, 19608, 37449, 66430, 111111, 177156, 271453, 402234, 579195, 813616, 1118481, 1508598, 2000719, 2613660, 3368421]
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
df = DataFrame(y = y, x = x)
f = @formula(y ~ x + x^2 + x^3 + x^4 + x^5 )
lrk= regress(f, df, req_stats=["default"])
lrk
using LinearRegressionKit, StatsModels, DataFrames, CSV
y = [3, 4, 4]
x = [4, 5, 6]
df = DataFrame(x= x, y= y)
f = @formula(y ~ 0 + x )
lm= regress(f, df, req_stats=["default"])
lm
using LinearRegressionKit, StatsModels, DataFrames, CSV
y = [130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140]
x = [60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
df = DataFrame(x= x, y= y)
f = @formula(y ~ 0 + x )
lm= regress(f, df, req_stats=["default"])
using LinearRegressionKit, StatsModels, DataFrames, CSV
y = [0.11019, 0.21956, 0.32949, 0.43899, 0.54803, 0.65694, 0.76562, 0.87487, 0.98292, 1.09146, 1.20001, 1.30822, 1.41599, 1.52399, 1.63194, 1.73947, 1.84646, 1.95392, 2.06128, 2.16844, 0.11052, 0.22018, 0.32939, 0.43886, 0.54798, 0.65739, 0.76596, 0.87474, 0.983, 1.0915, 1.20004, 1.30818, 1.41613, 1.52408, 1.63159, 1.73965, 1.84696, 1.95445, 2.06177, 2.16829]
x = [150000, 300000, 450000, 600000, 750000, 900000, 1050000, 1200000, 1350000, 1500000, 1650000, 1800000, 1950000, 2100000, 2250000, 2400000, 2550000, 2700000, 2850000, 3000000, 150000, 300000, 450000, 600000, 750000, 900000, 1050000, 1200000, 1350000, 1500000, 1650000, 1800000, 1950000, 2100000, 2250000, 2400000, 2550000, 2700000, 2850000, 3000000]
df = DataFrame(x= x, y= y)
f = @formula(y ~ x + x^2)
lrk= regress(f, df, req_stats=["default"])
lrk