Skip to content

Instantly share code, notes, and snippets.

@antimon2
Last active August 29, 2015 14:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save antimon2/e5ac55cd69a24d21331b to your computer and use it in GitHub Desktop.
Save antimon2/e5ac55cd69a24d21331b to your computer and use it in GitHub Desktop.
線形回帰の Normal Equation(正規方程式)について ref: http://qiita.com/antimon2/items/ac1ebaed75ad58406b94
import numpy as np
X = np.array([
[1, 1, 2, 3],
[1, 2, 3, 5],
[1, 3, 5, 8],
[1, 5, 8, 13],
[1, 8, 13, 21],
[1, 13, 21, 34]])
y = np.array([[8], [13], [21], [34], [55], [89]])
# check rank of matrix
np.linalg.matrix_rank(X)
# => 3
# calcuration
# [1]
np.linalg.pinv(X.T.dot(X)).dot(X.T.dot(y))
# array([[ 2.27373675e-13],
# [ 3.33333333e-01],
# [ 1.33333333e+00],
# [ 1.66666667e+00]])
# [2]
np.linalg.pinv(X).dot(y)
# array([[ 3.55271368e-15],
# [ 3.33333333e-01],
# [ 1.33333333e+00],
# [ 1.66666667e+00]])
# [4]
np.linalg.solve(X.T.dot(X), X.T.dot(y))
# array([[ -8.12048841e-14],
# [ -5.00000000e+00],
# [ -4.00000000e+00],
# [ 7.00000000e+00]])
# Square Matrix
X = X[0:4]
# array([[ 1, 1, 2, 3],
# [ 1, 2, 3, 5],
# [ 1, 3, 5, 8],
# [ 1, 5, 8, 13]])
y = y[0:4]
# array([[ 8],
# [13],
# [21],
# [34]])
# calcuration
# [1]
np.linalg.pinv(X.T.dot(X)).dot(X.T.dot(y))
# array([[ -1.13686838e-13],
# [ 3.33333333e-01],
# [ 1.33333333e+00],
# [ 1.66666667e+00]])
# [2]
np.linalg.pinv(X).dot(y)
# array([[ 4.44089210e-15],
# [ 3.33333333e-01],
# [ 1.33333333e+00],
# [ 1.66666667e+00]])
# [4]
np.linalg.solve(X.T.dot(X), X.T.dot(y))
# array([[ -1.47008842e-14],
# [ 1.00000000e+00],
# [ 2.00000000e+00],
# [ 1.00000000e+00]])
X = [1 1 2 3;
1 2 3 5;
1 3 5 8;
1 5 8 13;
1 8 13 21;
1 13 21 34]
y = [8; 13; 21; 34; 55; 89]
# check rank of matrix
rank(X)
# => 3
# calcuration
# [1]
pinv(X'*X) * X'*y
# ans =
#
# 3.1974e-13
# 3.3333e-01
# 1.3333e+00
# 1.6667e+00
# [2]
pinv(X) * y
# ans =
#
# 0.00000
# 0.33333
# 1.33333
# 1.66667
# [3]
X \ y
# ans =
#
# -1.3628e-14
# 3.3333e-01
# 1.3333e+00
# 1.6667e+00
# [4]
(X'*X) \ (X'*y)
# warning: matrix singular to machine precision, rcond = 4.97057e-18
# ans =
#
# -1.8859e-13
# 3.3333e-01
# 1.3333e+00
# 1.6667e+00
# Square Matrix
X = X(1:4, 1:4)
# X =
#
# 1 1 2 3
# 1 2 3 5
# 1 3 5 8
# 1 5 8 13
y = y(1:4)
# y =
#
# 8
# 13
# 21
# 34
# calcuration
# [1]
pinv(X'*X) * X'*y
# ans =
#
# 1.8119e-13
# 3.3333e-01
# 1.3333e+00
# 1.6667e+00
# [2]
pinv(X) * y
# ans =
#
# -7.1054e-15
# 3.3333e-01
# 1.3333e+00
# 1.6667e+00
# [3]
X \ y
# warning: matrix singular to machine precision, rcond = 0
# ans =
#
# -7.3807e-15
# 3.3333e-01
# 1.3333e+00
# 1.6667e+00
# [4]
(X'*X) \ (X'*y)
# warning: matrix singular to machine precision, rcond = 1.26207e-17
# ans =
#
# 1.5742e-14
# 3.3333e-01
# 1.3333e+00
# 1.6667e+00
X = rand(10, 4)
# 10x4 Array{Float64,2}:
# 0.71148 0.968352 0.0952939 0.796324
# 0.915128 0.128326 0.630086 0.0635579
# 0.351199 0.131409 0.934867 0.501701
# 0.165645 0.874088 0.173725 0.976326
# 0.765261 0.790716 0.760362 0.204496
# 0.544099 0.156464 0.041718 0.507071
# 0.764964 0.852837 0.230312 0.134783
# 0.0738597 0.75529 0.693856 0.0107293
# 0.621861 0.56881 0.66972 0.163911
# 0.9471 0.453841 0.466836 0.10744
y = rand(10, 1)
# 10x1 Array{Float64,2}:
# 0.389321
# 0.436261
# 0.308189
# 0.734617
# 0.410237
# 0.4969
# 0.0708882
# 0.0840005
# 0.944711
# 0.14718
# calcuration
# [1]
pinv(X' * X) * X' * y
# 4x1 Array{Float64,2}:
# 0.169937
# -0.0365938
# 0.273122
# 0.55004
# [2]
pinv(X) * y
# 4x1 Array{Float64,2}:
# 0.169937
# -0.0365938
# 0.273122
# 0.55004
# [3]
X \ y
# 4x1 Array{Float64,2}:
# 0.169937
# -0.0365938
# 0.273122
# 0.55004
# [4]
(X'*X) \ (X'*y)
# 4x1 Array{Float64,2}:
# 0.169937
# -0.0365938
# 0.273122
# 0.55004
# time measurement (n = 10)
# [1]
@time for k=1:10000
X = rand(40, 10)
y = rand(40, 1)
pinv(X' * X) * X' * y
end
# elapsed time: 1.087745051 seconds (283600016 bytes allocated, 17.28% gc time)
# [2]
@time for k=1:10000
X = rand(40, 10)
y = rand(40, 1)
pinv(X) * y
end
# elapsed time: 1.278193773 seconds (334800016 bytes allocated, 17.29% gc time)
# [3]
@time for k=1:10000
X = rand(40, 10)
y = rand(40, 1)
X \ y
end
# elapsed time: 1.014968744 seconds (324320000 bytes allocated, 20.29% gc time)
# [4]
@time for k=1:10000
X = rand(100, 30)
y = rand(100, 1)
(X'*X) \ (X'*y)
end
# elapsed time: 0.163586767 seconds (62720032 bytes allocated, 41.51% gc time)
# time measurement (n = 30)
# [1]
@time for k=1:10000
X = rand(100, 30)
y = rand(100, 1)
pinv(X' * X) * X' * y
end
# elapsed time: 5.820615493 seconds (1557840000 bytes allocated, 19.02% gc time)
# [2]
@time for k=1:10000
X = rand(100, 30)
y = rand(100, 1)
pinv(X) * y
end
# elapsed time: 7.518744844 seconds (1914480016 bytes allocated, 16.51% gc time)
# [3]
@time for k=1:10000
X = rand(100, 30)
y = rand(100, 1)
X \ y
end
# elapsed time: 3.455976006 seconds (1292000000 bytes allocated, 22.67% gc time)
# [4]
@time for k=1:10000
X = rand(100, 30)
y = rand(100, 1)
(X'*X) \ (X'*y)
end
# elapsed time: 0.777771618 seconds (407840016 bytes allocated, 32.71% gc time)
% solve_X_y.m
X = rand(10, 4)
# X =
#
# 0.033536 0.816107 0.996677 0.958327
# 0.683542 0.116498 0.614316 0.884338
# 0.734337 0.769245 0.696212 0.245270
# 0.216938 0.013297 0.885327 0.906086
# 0.630620 0.733668 0.820551 0.784664
# 0.138834 0.838178 0.216751 0.638286
# 0.100739 0.893597 0.891867 0.239482
# 0.362333 0.404999 0.018274 0.922847
# 0.102606 0.442110 0.744582 0.452299
# 0.590709 0.274452 0.459526 0.656588
y = rand(10, 1)
# y =
#
# 0.48518
# 0.13242
# 0.60525
# 0.31265
# 0.59250
# 0.47161
# 0.95971
# 0.44011
# 0.60115
# 0.75571
# calcuration
# [1]
pinv(X' * X) * X' * y
# ans =
#
# 0.1861915
# 0.5484641
# 0.2473279
# -0.0031611
# [2]
pinv(X) * y
# ans =
#
# 0.1861915
# 0.5484641
# 0.2473279
# -0.0031611
# [3]
X \ y
# ans =
#
# 0.1861915
# 0.5484641
# 0.2473279
# -0.0031611
# [4]
(X'*X) \ (X'*y)
# ans =
#
# 0.1861915
# 0.5484641
# 0.2473279
# -0.0031611
# time measurement (n = 10)
# [1]
tic();
for k=1:10000;
X = rand(40, 10);
y = rand(40, 1);
pinv(X' * X) * X' * y;
end;
toc()
# Elapsed time is 1.26513 seconds.
# [2]
tic();
for k=1:10000;
X = rand(40, 10);
y = rand(40, 1);
pinv(X) * y;
end;
toc()
# Elapsed time is 1.16283 seconds.
# [3]
tic();
for k=1:10000;
X = rand(40, 10);
y = rand(40, 1);
X \ y;
end;
toc()
# Elapsed time is 0.902037 seconds.
# [4]
tic();
for k=1:10000;
X = rand(40, 10);
y = rand(40, 1);
(X'*X) \ (X'*y);
end;
toc()
# Elapsed time is 0.689348 seconds.
# time measurement (n = 30)
# [1]
tic();
for k=1:10000;
X = rand(100, 30);
y = rand(100, 1);
pinv(X' * X) * X' * y;
end;
toc()
# Elapsed time is 5.79588 seconds.
# [2]
tic();
for k=1:10000;
X = rand(100, 30);
y = rand(100, 1);
pinv(X) * y;
end;
toc()
# Elapsed time is 7.11547 seconds.
# [3]
tic();
for k=1:10000;
X = rand(100, 30);
y = rand(100, 1);
X \ y;
end;
toc()
# Elapsed time is 3.64188 seconds.
# [4]
tic();
for k=1:10000;
X = rand(100, 30);
y = rand(100, 1);
(X'*X) \ (X'*y);
end;
toc()
# Elapsed time is 1.37039 seconds.
import numpy as np
X = np.random.rand(10, 4)
# array([[ 0.61009055, 0.71722947, 0.48465025, 0.15660522],
# [ 0.02424431, 0.49947237, 0.60493258, 0.8988653 ],
# [ 0.65048106, 0.69667863, 0.52860957, 0.65003537],
# [ 0.56541266, 0.25463788, 0.74047536, 0.64691215],
# [ 0.03052439, 0.47651739, 0.01667898, 0.7613639 ],
# [ 0.87725831, 0.47684888, 0.44039111, 0.39706053],
# [ 0.58302851, 0.20919564, 0.97598994, 0.19268083],
# [ 0.35987338, 0.98331404, 0.06299533, 0.76193058],
# [ 0.625453 , 0.70985323, 0.62948802, 0.627458 ],
# [ 0.64201569, 0.22264827, 0.71333221, 0.53305839]])
y = np.random.rand(10, 1)
# array([[ 0.99674247],
# [ 0.66282312],
# [ 0.68295932],
# [ 0.14330449],
# [ 0.17467666],
# [ 0.90896029],
# [ 0.65385071],
# [ 0.00748736],
# [ 0.93824979],
# [ 0.91696375]])
# calcuration
# [1]
np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(y)
# array([[ 0.32591078],
# [ 0.46479763],
# [ 0.6684976 ],
# [-0.26695783]])
# [2]
np.linalg.pinv(X).dot(y)
# array([[ 0.32591078],
# [ 0.46479763],
# [ 0.6684976 ],
# [-0.26695783]])
# x[3]
# np.linalg.solve(X, y)
# @> LinAlgError
# [4]
np.linalg.solve(X.T.dot(X), X.T.dot(y))
# array([[ 0.32591078],
# [ 0.46479763],
# [ 0.6684976 ],
# [-0.26695783]])
# time measurement (n = 10)
from timeit import timeit
# [1]
def test_a():
X = np.random.rand(40, 10)
y = np.random.rand(40, 1)
np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(y)
timeit("test_a()", setup="from __main__ import test_a", number=10000)
# 1.1948060989379883
# [2]
def test_b():
X = np.random.rand(40, 10)
y = np.random.rand(40, 1)
np.linalg.pinv(X).dot(y)
timeit("test_b()", setup="from __main__ import test_b", number=10000)
# 1.2698009014129639
# [4]
def test_c():
X = np.random.rand(40, 10)
y = np.random.rand(40, 1)
np.linalg.solve(X.T.dot(X), X.T.dot(y))
timeit("test_c()", setup="from __main__ import test_c", number=10000)
# 0.4645709991455078
# time measurement (n = 30)
# [1]
def test_d():
X = np.random.rand(100, 30)
y = np.random.rand(100, 1)
np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(y)
timeit("test_d()", setup="from __main__ import test_d", number=10000)
# 4.615994930267334
# [2]
def test_e():
X = np.random.rand(100, 30)
y = np.random.rand(100, 1)
np.linalg.pinv(X).dot(y)
timeit("test_e()", setup="from __main__ import test_e", number=10000)
# 5.413921117782593
# [4]
def test_f():
X = np.random.rand(100, 30)
y = np.random.rand(100, 1)
np.linalg.solve(X.T.dot(X), X.T.dot(y))
timeit("test_f()", setup="from __main__ import test_f", number=10000)
# 0.9642360210418701
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment