Created
May 8, 2018 17:26
-
-
Save koshian2/e75c4120764ce25a5b2c0ecf6626e490 to your computer and use it in GitHub Desktop.
Coursera Machine LearningをPythonで実装 - [Week5]ニューラルネットワーク(2) [1]自分で実装
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import copy | |
import matplotlib.pyplot as plt | |
from scipy.io import loadmat | |
## データの読み込み | |
def load_data1(): | |
data = loadmat("ex4data1") | |
return np.array(data['X']), np.ravel(np.array(data['y'])) | |
X_data, y = load_data1() | |
m = len(X_data[:, 1]) | |
# Theta1,2の仮の値を読み込む | |
def load_weights(): | |
data = loadmat("ex4weights") | |
return np.array(data['Theta1']), np.array(data['Theta2']) | |
Theta1, Theta2 = load_weights() | |
# 定数の定義 | |
INPUT_LAYER_SIZE = 400 | |
HIDDEN_LAYER_SIZE = 25 | |
NUM_LABELS = 10 | |
## 関数の定義 | |
# シグモイド関数 | |
def sigmoid(z): | |
return 1 / (1 + np.exp(-z)) | |
# シグモイド関数の導関数 | |
def sigmoid_gradient(z): | |
g = sigmoid(z) | |
return g * (1 - g) | |
# コスト関数 | |
def nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_): | |
# UnrollではなくTupleとして入れる | |
Theta1 = nn_params[0] | |
Theta2 = nn_params[1] | |
m = X.shape[0] | |
# yが1~10の値なので分類器ごとのブール値になるように変換 | |
Y = np.zeros((m, num_labels)) | |
for i in range(num_labels): | |
Y[:, i] = y == (i+1) | |
## Forward Propagation | |
# 入力層にバイアスユニットを追加 | |
A1 = np.c_[np.ones((m, 1)), X] | |
# 第2層のパラメーターを計算 | |
Z2 = np.dot(A1, Theta1.T) | |
A2 = sigmoid(Z2) | |
# 第2層にバイアスユニットを追加 | |
A2 = np.c_[np.ones((A2.shape[0], 1)), A2] | |
# 推定値 | |
h_theta = sigmoid(np.dot(A2, Theta2.T)) | |
# 誤差の計算 | |
J = np.sum(np.sum(-Y * np.log(h_theta) - (1 - Y) * np.log(1 - h_theta))) / m | |
J += lambda_ / 2 / m * (np.sum(np.sum(Theta1[:, 1:] ** 2)) + np.sum(np.sum(Theta2[:, 1:] ** 2))) | |
## Back Propagation | |
# δ^(3)=推定値-訓練データのY | |
delta_3 = h_theta - Y | |
# δ^(2)の計算 | |
delta_2 = np.dot(delta_3, Theta2) * np.c_[np.zeros((m, 1)), sigmoid_gradient(Z2)] | |
# δ^(2)からバイアスユニットのパラメーターを取り除く | |
delta_2 = delta_2[:, 1:] | |
# 誤差を集計し、⊿^(1), ⊿^(2)を計算 | |
Delta2 = np.dot(delta_3.T, A2) | |
Delta1 = np.dot(delta_2.T, A1) | |
# 正規化を除いた勾配行列を求める | |
Theta2_grad = Delta2 / m | |
Theta1_grad = Delta1 / m | |
## 正則化の項を勾配に追加 | |
if lambda_ > 0: | |
# Numpyの行列の代入は参照渡しなので、ディープコピーする | |
temp = copy.deepcopy(Theta1) | |
temp[:, 0] = 0 | |
Theta1_grad += temp * (lambda_ / m) | |
# Theta2についても同様に足す | |
temp = copy.deepcopy(Theta2) | |
temp[:, 0] = 0 | |
Theta2_grad += temp * (lambda_ / m) | |
# 返り値はJ, (Theta1_grad, Theta2_grad) | |
return J, (Theta1_grad, Theta2_grad) | |
# ランダムな初期値を設定 | |
def rand_initialize_weights(L_in, L_out): | |
# Θに係数に対して、(Forward Propagationで)入力元のレイヤーのユニットの数=L_in | |
# 出ていくレイヤーのユニットの数=L_out | |
epsilon_init = 0.12 | |
# ε_initをアドホックに与えたが、ε_init = √6÷√(L_in + L_out)とおくとよいらしい | |
# ε_init=0.12 で逆算すると、L_in + L_outが416.7になるので、入力層=401→隠れ層=25なのでだいたいあってる | |
return np.random.uniform(-epsilon_init, epsilon_init, (L_out, 1+L_in)) | |
## 関数のテスト | |
# 事前に読み込んだTheta1, Theta2で計算, λ=0とする(正則化なし) | |
lambda_ = 0 | |
J,grad = nn_cost_function((Theta1, Theta2), INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE, NUM_LABELS, X_data, y, lambda_) | |
print("Feedforward Using Neural Network ...") | |
print("Cost at parameters (loaded from ex4weights):", J) | |
print("(this value should be about 0.287629)\n") | |
# λ=1で正則化 | |
lambda_ = 1 | |
J,grad = nn_cost_function((Theta1, Theta2), INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE, NUM_LABELS, X_data, y, lambda_) | |
print("Checking Cost Function (w/ Regularization) ...") | |
print("Cost at parameters (loaded from ex4weights):", J) | |
print("(this value should be about 0.383770)\n") | |
## シグモイド関数の微分のテスト | |
g = sigmoid_gradient(np.array([-1, -0.5, 0, 0.5, 1])) | |
print("Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:") | |
print(g) | |
print() | |
## Gradient Checking(Back Propagationのデバッグ) | |
# デバッグ用の初期値設定 | |
def debug_initialize_weights(fan_out, fan_in): | |
W = np.zeros((fan_out, 1+fan_in)) | |
W = np.sin(np.arange(1, np.size(W)+1)).reshape(W.shape, order='F') / 10 | |
return W | |
# Gradient Checking用にJ(θ±ε)を計算する(とても遅い) | |
def compute_numerical_gradient(cost_wrap_func, theta): | |
Theta1 = theta[0] | |
Theta2 = theta[1] | |
pertub1 = np.zeros(Theta1.shape) | |
pertub2 = np.zeros(Theta2.shape) | |
numgrad = (np.zeros(Theta1.shape), np.zeros(Theta2.shape)) | |
e = 1e-4 | |
# 両側微分を求める関数 | |
calc_cost = lambda: (cost_wrap_func((Theta1 + pertub1, Theta2 + pertub2)) - cost_wrap_func((Theta1 - pertub1, Theta2 - pertub2))) / 2 / e | |
for p in range(np.size(Theta1)): | |
index = np.unravel_index(p, Theta1.shape, order="F") | |
pertub1[index] = e | |
numgrad[0][index] = calc_cost() | |
pertub1[index] = 0 | |
for p in range(np.size(Theta2)): | |
index = np.unravel_index(p, Theta2.shape, order="F") | |
pertub2[index] = e | |
numgrad[1][index] = calc_cost() | |
pertub2[index] = 0 | |
return numgrad | |
# Gradient Checking用の関数 | |
def check_nn_gradients(lambda_): | |
# デバッグ用パラメーター | |
input_layer_size, hidden_layer_size, num_labels, m = 3, 5, 3, 5 | |
# Theta1,Theta2にランダムな初期値を与える | |
Theta1 = debug_initialize_weights(hidden_layer_size, input_layer_size) | |
Theta2 = debug_initialize_weights(num_labels, hidden_layer_size) | |
# Xの値もデバッグ用のランダムな値にする | |
X = debug_initialize_weights(m, input_layer_size-1) | |
# yはmod関数で適当に作る | |
y = 1 + np.mod(np.arange(1, m+1), num_labels) | |
# コスト関数を使ってコストと勾配の計算 | |
cost, grad = nn_cost_function((Theta1, Theta2), input_layer_size, hidden_layer_size, num_labels, X, y, lambda_) | |
cost_warp = lambda Thetas: nn_cost_function(Thetas, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_)[0] | |
numgrad = compute_numerical_gradient(cost_warp, (Theta1, Theta2)) | |
#BackPropagationと数値計算の値の比較 | |
# 結果をベクトルに変換 | |
result_grad= np.append(np.ravel(grad[0], order="F"), np.ravel(grad[1], order="F")) | |
result_numgrad = np.append(np.ravel(numgrad[0], order="F"), np.ravel(numgrad[1], order="F")) | |
for g, n in zip(result_grad, result_numgrad): | |
print(n, g) | |
print("The above two columns you get should be very similar.") | |
print("(Left-Your Numerical Gradient, Right-Analytical Gradient)\n") | |
##相対誤差の計算 | |
diff = np.linalg.norm(result_numgrad - result_grad) / np.linalg.norm(result_numgrad + result_grad) | |
print("If your backpropagation implementation is correct, then") | |
print("the relative difference will be small (less than 1e-9).") | |
print("Relative Difference: ", diff) | |
#λ=0でGradient Checking | |
print("Checking Backpropagation...") | |
check_nn_gradients(0) | |
print() | |
#λ=3でGradient Checking | |
print("Checking Backpropagation (w/ Regularization) ... ") | |
lambda_ = 3 | |
check_nn_gradients(lambda_) | |
print() | |
# 同様にでコスト関数もデバッグ | |
debug_J, debug_grad = nn_cost_function((Theta1, Theta2), INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE, NUM_LABELS, X_data, y, lambda_) | |
print("Cost at (fixed) debugging parameters (w/ lambda =", lambda_, ") :", debug_J) | |
print("(for lambda = 3, this value should be about 0.576051)\n") | |
## ニューラルネットワークの訓練 | |
import sys, time | |
# 最急降下法の定義 | |
def gradient_descent(initial_all_theta, cost_warp, eta, maxiter = 100): | |
theta_before = initial_all_theta | |
for i in range(maxiter): | |
J, grad = cost_warp(theta_before) | |
theta = copy.deepcopy(theta_before) | |
for j in range(len(initial_all_theta)): | |
theta[j] = theta[j] - eta * grad[j] | |
mes = f"iter = {i}, J = {J}" | |
theta_before = theta | |
# コンソールを埋め尽くさないように上書きしながら表示 | |
sys.stdout.write("\r%s" % mes) | |
sys.stdout.flush() | |
time.sleep(0.01) | |
return theta, J | |
# Θの初期値 | |
initial_Theta1 = rand_initialize_weights(INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE) | |
initial_Theta2 = rand_initialize_weights(HIDDEN_LAYER_SIZE, NUM_LABELS) | |
# 正規化=1と設定(ここをいろいろ変える) | |
lambda_ = 1 | |
# ニューラルネットワークの訓練 | |
print("Training Neural Network...") | |
nn_cost_wrap = lambda Theta: nn_cost_function(Theta, INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE, NUM_LABELS, X_data, y, lambda_) | |
nn_Thetas, cost = gradient_descent([initial_Theta1, initial_Theta2], nn_cost_wrap, 2, 500) | |
print() | |
## 予測と精度 | |
def predict(Theta1, Theta2, X): | |
m = X.shape[0] | |
num_labes = Theta2.shape[0] | |
h1 = sigmoid(np.dot(np.c_[np.ones((m, 1)), X], Theta1.T)) | |
h2 = sigmoid(np.dot(np.c_[np.ones((m, 1)), h1], Theta2.T)) | |
p = np.argmax(h2, axis=1)+1 | |
return p | |
# 精度 | |
pred = predict(nn_Thetas[0], nn_Thetas[1], X_data) | |
print("Training Set Accuracy:", np.mean(pred == y) * 100) | |
## ネットワークの可視化 | |
def display_data(X): | |
fig = plt.figure(figsize = (5, 5)) | |
fig.subplots_adjust(hspace=0.05, wspace=0.05) | |
for i in range(X.shape[0]): | |
ax = fig.add_subplot(5, 5, i+1, xticks=[], yticks=[]) | |
ax.imshow(X[i, :-1].reshape(20, 20, order="F"), cmap="gray") | |
plt.show() | |
display_data(nn_Thetas[0])#もう二度とニューラルネットワークを自力で書きたくねえ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment