Skip to content

Instantly share code, notes, and snippets.

@koshian2
Created May 8, 2018 17:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save koshian2/e75c4120764ce25a5b2c0ecf6626e490 to your computer and use it in GitHub Desktop.
Save koshian2/e75c4120764ce25a5b2c0ecf6626e490 to your computer and use it in GitHub Desktop.
Coursera Machine LearningをPythonで実装 - [Week5]ニューラルネットワーク(2) [1]自分で実装
import numpy as np
import copy
import matplotlib.pyplot as plt
from scipy.io import loadmat
## データの読み込み
def load_data1():
data = loadmat("ex4data1")
return np.array(data['X']), np.ravel(np.array(data['y']))
X_data, y = load_data1()
m = len(X_data[:, 1])
# Theta1,2の仮の値を読み込む
def load_weights():
data = loadmat("ex4weights")
return np.array(data['Theta1']), np.array(data['Theta2'])
Theta1, Theta2 = load_weights()
# 定数の定義
INPUT_LAYER_SIZE = 400
HIDDEN_LAYER_SIZE = 25
NUM_LABELS = 10
## 関数の定義
# シグモイド関数
def sigmoid(z):
return 1 / (1 + np.exp(-z))
# シグモイド関数の導関数
def sigmoid_gradient(z):
g = sigmoid(z)
return g * (1 - g)
# コスト関数
def nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_):
# UnrollではなくTupleとして入れる
Theta1 = nn_params[0]
Theta2 = nn_params[1]
m = X.shape[0]
# yが1~10の値なので分類器ごとのブール値になるように変換
Y = np.zeros((m, num_labels))
for i in range(num_labels):
Y[:, i] = y == (i+1)
## Forward Propagation
# 入力層にバイアスユニットを追加
A1 = np.c_[np.ones((m, 1)), X]
# 第2層のパラメーターを計算
Z2 = np.dot(A1, Theta1.T)
A2 = sigmoid(Z2)
# 第2層にバイアスユニットを追加
A2 = np.c_[np.ones((A2.shape[0], 1)), A2]
# 推定値
h_theta = sigmoid(np.dot(A2, Theta2.T))
# 誤差の計算
J = np.sum(np.sum(-Y * np.log(h_theta) - (1 - Y) * np.log(1 - h_theta))) / m
J += lambda_ / 2 / m * (np.sum(np.sum(Theta1[:, 1:] ** 2)) + np.sum(np.sum(Theta2[:, 1:] ** 2)))
## Back Propagation
# δ^(3)=推定値-訓練データのY
delta_3 = h_theta - Y
# δ^(2)の計算
delta_2 = np.dot(delta_3, Theta2) * np.c_[np.zeros((m, 1)), sigmoid_gradient(Z2)]
# δ^(2)からバイアスユニットのパラメーターを取り除く
delta_2 = delta_2[:, 1:]
# 誤差を集計し、⊿^(1), ⊿^(2)を計算
Delta2 = np.dot(delta_3.T, A2)
Delta1 = np.dot(delta_2.T, A1)
# 正規化を除いた勾配行列を求める
Theta2_grad = Delta2 / m
Theta1_grad = Delta1 / m
## 正則化の項を勾配に追加
if lambda_ > 0:
# Numpyの行列の代入は参照渡しなので、ディープコピーする
temp = copy.deepcopy(Theta1)
temp[:, 0] = 0
Theta1_grad += temp * (lambda_ / m)
# Theta2についても同様に足す
temp = copy.deepcopy(Theta2)
temp[:, 0] = 0
Theta2_grad += temp * (lambda_ / m)
# 返り値はJ, (Theta1_grad, Theta2_grad)
return J, (Theta1_grad, Theta2_grad)
# ランダムな初期値を設定
def rand_initialize_weights(L_in, L_out):
# Θに係数に対して、(Forward Propagationで)入力元のレイヤーのユニットの数=L_in
# 出ていくレイヤーのユニットの数=L_out
epsilon_init = 0.12
# ε_initをアドホックに与えたが、ε_init = √6÷√(L_in + L_out)とおくとよいらしい
# ε_init=0.12 で逆算すると、L_in + L_outが416.7になるので、入力層=401→隠れ層=25なのでだいたいあってる
return np.random.uniform(-epsilon_init, epsilon_init, (L_out, 1+L_in))
## 関数のテスト
# 事前に読み込んだTheta1, Theta2で計算, λ=0とする(正則化なし)
lambda_ = 0
J,grad = nn_cost_function((Theta1, Theta2), INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE, NUM_LABELS, X_data, y, lambda_)
print("Feedforward Using Neural Network ...")
print("Cost at parameters (loaded from ex4weights):", J)
print("(this value should be about 0.287629)\n")
# λ=1で正則化
lambda_ = 1
J,grad = nn_cost_function((Theta1, Theta2), INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE, NUM_LABELS, X_data, y, lambda_)
print("Checking Cost Function (w/ Regularization) ...")
print("Cost at parameters (loaded from ex4weights):", J)
print("(this value should be about 0.383770)\n")
## シグモイド関数の微分のテスト
g = sigmoid_gradient(np.array([-1, -0.5, 0, 0.5, 1]))
print("Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:")
print(g)
print()
## Gradient Checking(Back Propagationのデバッグ)
# デバッグ用の初期値設定
def debug_initialize_weights(fan_out, fan_in):
W = np.zeros((fan_out, 1+fan_in))
W = np.sin(np.arange(1, np.size(W)+1)).reshape(W.shape, order='F') / 10
return W
# Gradient Checking用にJ(θ±ε)を計算する(とても遅い)
def compute_numerical_gradient(cost_wrap_func, theta):
Theta1 = theta[0]
Theta2 = theta[1]
pertub1 = np.zeros(Theta1.shape)
pertub2 = np.zeros(Theta2.shape)
numgrad = (np.zeros(Theta1.shape), np.zeros(Theta2.shape))
e = 1e-4
# 両側微分を求める関数
calc_cost = lambda: (cost_wrap_func((Theta1 + pertub1, Theta2 + pertub2)) - cost_wrap_func((Theta1 - pertub1, Theta2 - pertub2))) / 2 / e
for p in range(np.size(Theta1)):
index = np.unravel_index(p, Theta1.shape, order="F")
pertub1[index] = e
numgrad[0][index] = calc_cost()
pertub1[index] = 0
for p in range(np.size(Theta2)):
index = np.unravel_index(p, Theta2.shape, order="F")
pertub2[index] = e
numgrad[1][index] = calc_cost()
pertub2[index] = 0
return numgrad
# Gradient Checking用の関数
def check_nn_gradients(lambda_):
# デバッグ用パラメーター
input_layer_size, hidden_layer_size, num_labels, m = 3, 5, 3, 5
# Theta1,Theta2にランダムな初期値を与える
Theta1 = debug_initialize_weights(hidden_layer_size, input_layer_size)
Theta2 = debug_initialize_weights(num_labels, hidden_layer_size)
# Xの値もデバッグ用のランダムな値にする
X = debug_initialize_weights(m, input_layer_size-1)
# yはmod関数で適当に作る
y = 1 + np.mod(np.arange(1, m+1), num_labels)
# コスト関数を使ってコストと勾配の計算
cost, grad = nn_cost_function((Theta1, Theta2), input_layer_size, hidden_layer_size, num_labels, X, y, lambda_)
cost_warp = lambda Thetas: nn_cost_function(Thetas, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_)[0]
numgrad = compute_numerical_gradient(cost_warp, (Theta1, Theta2))
#BackPropagationと数値計算の値の比較
# 結果をベクトルに変換
result_grad= np.append(np.ravel(grad[0], order="F"), np.ravel(grad[1], order="F"))
result_numgrad = np.append(np.ravel(numgrad[0], order="F"), np.ravel(numgrad[1], order="F"))
for g, n in zip(result_grad, result_numgrad):
print(n, g)
print("The above two columns you get should be very similar.")
print("(Left-Your Numerical Gradient, Right-Analytical Gradient)\n")
##相対誤差の計算
diff = np.linalg.norm(result_numgrad - result_grad) / np.linalg.norm(result_numgrad + result_grad)
print("If your backpropagation implementation is correct, then")
print("the relative difference will be small (less than 1e-9).")
print("Relative Difference: ", diff)
#λ=0でGradient Checking
print("Checking Backpropagation...")
check_nn_gradients(0)
print()
#λ=3でGradient Checking
print("Checking Backpropagation (w/ Regularization) ... ")
lambda_ = 3
check_nn_gradients(lambda_)
print()
# 同様にでコスト関数もデバッグ
debug_J, debug_grad = nn_cost_function((Theta1, Theta2), INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE, NUM_LABELS, X_data, y, lambda_)
print("Cost at (fixed) debugging parameters (w/ lambda =", lambda_, ") :", debug_J)
print("(for lambda = 3, this value should be about 0.576051)\n")
## ニューラルネットワークの訓練
import sys, time
# 最急降下法の定義
def gradient_descent(initial_all_theta, cost_warp, eta, maxiter = 100):
theta_before = initial_all_theta
for i in range(maxiter):
J, grad = cost_warp(theta_before)
theta = copy.deepcopy(theta_before)
for j in range(len(initial_all_theta)):
theta[j] = theta[j] - eta * grad[j]
mes = f"iter = {i}, J = {J}"
theta_before = theta
# コンソールを埋め尽くさないように上書きしながら表示
sys.stdout.write("\r%s" % mes)
sys.stdout.flush()
time.sleep(0.01)
return theta, J
# Θの初期値
initial_Theta1 = rand_initialize_weights(INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE)
initial_Theta2 = rand_initialize_weights(HIDDEN_LAYER_SIZE, NUM_LABELS)
# 正規化=1と設定(ここをいろいろ変える)
lambda_ = 1
# ニューラルネットワークの訓練
print("Training Neural Network...")
nn_cost_wrap = lambda Theta: nn_cost_function(Theta, INPUT_LAYER_SIZE, HIDDEN_LAYER_SIZE, NUM_LABELS, X_data, y, lambda_)
nn_Thetas, cost = gradient_descent([initial_Theta1, initial_Theta2], nn_cost_wrap, 2, 500)
print()
## 予測と精度
def predict(Theta1, Theta2, X):
m = X.shape[0]
num_labes = Theta2.shape[0]
h1 = sigmoid(np.dot(np.c_[np.ones((m, 1)), X], Theta1.T))
h2 = sigmoid(np.dot(np.c_[np.ones((m, 1)), h1], Theta2.T))
p = np.argmax(h2, axis=1)+1
return p
# 精度
pred = predict(nn_Thetas[0], nn_Thetas[1], X_data)
print("Training Set Accuracy:", np.mean(pred == y) * 100)
## ネットワークの可視化
def display_data(X):
fig = plt.figure(figsize = (5, 5))
fig.subplots_adjust(hspace=0.05, wspace=0.05)
for i in range(X.shape[0]):
ax = fig.add_subplot(5, 5, i+1, xticks=[], yticks=[])
ax.imshow(X[i, :-1].reshape(20, 20, order="F"), cmap="gray")
plt.show()
display_data(nn_Thetas[0])#もう二度とニューラルネットワークを自力で書きたくねえ
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment