Created
May 8, 2018 15:42
-
-
Save koshian2/c26ee4d139fa1083cabeb8aa2a53696b to your computer and use it in GitHub Desktop.
Coursera Machine LearningをPythonで実装 - [Week2]単回帰分析、重回帰分析 (1)単回帰分析
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from mpl_toolkits.mplot3d import Axes3D | |
from matplotlib import cm | |
# 単回帰分析 # | |
# データ | |
X = np.array([6.1101,5.5277,8.5186,7.0032,5.8598,8.3829,7.4764,8.5781,6.4862,5.0546,5.7107,14.164,5.734,8.4084,5.6407,5.3794,6.3654,5.1301,6.4296,7.0708,6.1891,20.27,5.4901,6.3261,5.5649,18.945,12.828,10.957,13.176,22.203,5.2524,6.5894,9.2482,5.8918,8.2111,7.9334,8.0959,5.6063,12.836,6.3534,5.4069,6.8825,11.708,5.7737,7.8247,7.0931,5.0702,5.8014,11.7,5.5416,7.5402,5.3077,7.4239,7.6031,6.3328,6.3589,6.2742,5.6397,9.3102,9.4536,8.8254,5.1793,21.279,14.908,18.959,7.2182,8.2951,10.236,5.4994,20.341,10.136,7.3345,6.0062,7.2259,5.0269,6.5479,7.5386,5.0365,10.274,5.1077,5.7292,5.1884,6.3557,9.7687,6.5159,8.5172,9.1802,6.002,5.5204,5.0594,5.7077,7.6366,5.8707,5.3054,8.2934,13.394,5.4369]) | |
y = np.array([17.592,9.1302,13.662,11.854,6.8233,11.886,4.3483,12,6.5987,3.8166,3.2522,15.505,3.1551,7.2258,0.71618,3.5129,5.3048,0.56077,3.6518,5.3893,3.1386,21.767,4.263,5.1875,3.0825,22.638,13.501,7.0467,14.692,24.147,-1.22,5.9966,12.134,1.8495,6.5426,4.5623,4.1164,3.3928,10.117,5.4974,0.55657,3.9115,5.3854,2.4406,6.7318,1.0463,5.1337,1.844,8.0043,1.0179,6.7504,1.8396,4.2885,4.9981,1.4233,-1.4211,2.4756,4.6042,3.9624,5.4141,5.1694,-0.74279,17.929,12.054,17.054,4.8852,5.7442,7.7754,1.0173,20.992,6.6799,4.0259,1.2784,3.3411,-2.6807,0.29678,3.8845,5.7014,6.7526,2.0576,0.47953,0.20421,0.67861,7.5435,5.3436,4.2415,6.7981,0.92695,0.152,2.8214,1.8451,4.2959,7.2029,1.9869,0.14454,9.0551,0.61705]) | |
# おまけ1(Scikit-learnの組み込み最小二乗法) | |
from sklearn.linear_model import LinearRegression | |
print("LinearRegression on sklearn.linear_model ...") | |
regr = LinearRegression(normalize=False) #Trueにした場合は平均とL2ノルムでスケール調整が行われる | |
regr.fit(X.reshape(-1, 1), y) #データが1変数の場合はreshapeするのが大事 | |
print("Intercept:", regr.intercept_) #切片(定数項) | |
print("Coefficients: ", regr.coef_) #xの係数 | |
predicts = regr.predict(np.array([3.5, 7]).reshape(-1, 1)) | |
print("For population = [35k, 70k], we predict a profit of \n", predicts * 10000) | |
print() | |
# おまけ2(Scikit-learnの組み込み勾配降下法による回帰) | |
from sklearn.linear_model import SGDRegressor | |
print("SGDRegressor on sklearn.linear_model ...") | |
#shuffle=Falseにすると最急降下法になるはず。max_iterはScikit-learn0.19以降 | |
regr = SGDRegressor(eta0=0.01, learning_rate="constant", max_iter=1500, shuffle=False) | |
regr.fit(X.reshape(-1, 1), y) | |
print("Intercept:", regr.intercept_) | |
print("Coefficients: ", regr.coef_) | |
predicts = regr.predict(np.array([3.5, 7]).reshape(-1, 1)) | |
print("For population = [35k, 70k], we predict a profit of \n", predicts * 10000) | |
print() | |
# プロット | |
plt.plot(X, y, "o", color="r") | |
plt.show() | |
# コスト関数 | |
def compute_cost(X, y, theta): | |
return np.sum((np.dot(X, theta) - y) ** 2) / 2 / len(y) | |
print("Testing the cost function ...") | |
X = np.array((np.ones(len(y)), X)).T | |
theta = np.zeros(2) | |
J = compute_cost(X, y, theta) | |
print("With theta = [0, 0]\nCost computed =", J) | |
J = compute_cost(X, y, np.array([-1,2])) | |
print("With theta = [-1, 2]\nCost computed =", J) | |
print() | |
# 最急降下法 | |
def gradient_descent(X, y, theta, alpha, num_iters): | |
J_history = [] | |
theta_out = theta | |
for i in range(num_iters): | |
theta_out -= alpha / len(y) * np.dot(X.T, np.dot(X, theta_out) - y) | |
J_history.append(compute_cost(X, y, theta_out)) | |
return theta_out, J_history | |
print("Running Gradient Descent ...") | |
iterations = 1500; | |
alpha = 0.01; | |
theta, J_history = gradient_descent(X, y, theta, alpha, iterations) | |
print("Theta found by gradient descent:", theta) | |
# 近似直線のプロット | |
plt.plot(X[:,1], y, "o", label="Training data", color="r") | |
plt.plot(X[:,1], np.dot(X, theta), label="Linear regression", color="b") | |
plt.legend() | |
plt.show() | |
# xが 35,000 70,000のときの推定値 | |
predict1 = np.dot(np.array([1, 3.5]), theta) | |
print("For population = 35,000, we predict a profit of ", predict1 * 10000) | |
predict2 = np.dot(np.array([1, 7]), theta) | |
print("For population = 70,000, we predict a profit of ", predict2 * 10000) | |
# Jのグラフ化 | |
theta0_vals = np.linspace(-10, 10, 100) | |
theta1_vals = np.linspace(-1, 4, 100) | |
theta0, theta1 = np.meshgrid(theta0_vals, theta1_vals) | |
J_vals = np.zeros((len(theta0_vals), len(theta1_vals))) | |
for i in range(len(theta0_vals)): | |
for j in range(len(theta1_vals)): | |
t = np.array([theta0_vals[i], theta1_vals[j]]) | |
J_vals[i, j] = compute_cost(X, y, t) | |
# Surface Plot | |
fig = plt.figure() | |
ax = Axes3D(fig) | |
ax.plot_surface(theta0, theta1, J_vals, rstride=1, cstride=1, cmap=cm.coolwarm) | |
plt.show() | |
print() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment