Skip to content

Instantly share code, notes, and snippets.

@koshian2
Created May 8, 2018 17:17
Show Gist options
  • Save koshian2/62514efd7c0d7b85f4a68fd5f935b230 to your computer and use it in GitHub Desktop.
Save koshian2/62514efd7c0d7b85f4a68fd5f935b230 to your computer and use it in GitHub Desktop.
Coursera Machine LearningをPythonで実装 - [Week4]ニューラルネットワーク(1) [1]多クラス分類、自分で実装
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
# データの読み込み
def load_data1():
data = loadmat("ex3data1")
# yが元データだと5000x1の行列なので、ベクトルに変換する
return np.array(data['X']), np.ravel(np.array(data['y']))
X_data, y = load_data1()
m = len(X_data[:, 1])
# ランダムに100個を画像で表示
np.random.seed(114514)# ここをコメントアウトすると再現性はなくなる
sel = np.arange(m)
np.random.shuffle(sel)
sel = sel[:100]
fig = plt.figure(figsize = (10, 10))
fig.subplots_adjust(hspace=0.05, wspace=0.05)
for i in range(100):
ax = fig.add_subplot(10, 10, i + 1, xticks=[], yticks=[])
ax.imshow(X_data[sel[i]].reshape((20, 20)).T, cmap='gray')
plt.show()
# シグモイド関数
def sigmoid(z):
return 1 / (1 + np.exp(-z))
# ロジスティック回帰のコスト関数
def lr_cost_function(theta, X, y, lambda_):
m = len(y)
h_theta = sigmoid(np.dot(X, theta))
J = np.sum(-y * np.log(h_theta) - (1 - y) * np.log(1 - h_theta)) / m + lambda_ / 2 / m * np.sum(theta[1:] ** 2)
# θ0を正則化しないようにする
grad = np.dot(X.T, h_theta - y) / m
temp = theta[:]
temp[0] = 0
grad += lambda_ / m * temp
return J, grad
# コスト関数のテスト
print("Testing lrCostFunction() with regularization")
theta_t = np.array([-2, -1, 1, 2])
# orderの設定をしないとreshapeのデフォルト設定はOctaveと逆なので注意
X_t = np.c_[np.ones(5), np.arange(1, 16).reshape(5, 3, order='F') / 10]
y_t = (np.array([1, 0, 1, 0, 1]) >= 0.5).astype(int)
lambda_t = 3
J, grad = lr_cost_function(theta_t, X_t, y_t, lambda_t)
print("Cost:", J)
print("Expected cost: 2.534819")
print("Gradients:")
print(grad)
print("Expected gradients:")
print(" 0.146561\n -0.548558\n 0.724722\n 1.398003\n")
# 遅いのでロジスティック回帰のコスト関数をコストと勾配に分割
def lr_cost_function_cost(theta, X, y, lambda_):
m = len(y)
h_theta = sigmoid(np.dot(X, theta))
J = np.sum(-y * np.log(h_theta) - (1 - y) * np.log(1 - h_theta)) / m + lambda_ / 2 / m * np.sum(theta[1:] ** 2)
return J
def lr_cost_function_grad(theta, X, y, lambda_):
m = len(y)
h_theta = sigmoid(np.dot(X, theta))
grad = np.dot(X.T, h_theta - y) / m
temp = theta[:]
temp[0] = 0
grad += lambda_ / m * temp
return grad
# 最急降下法(組み込みが遅いので自分で実装)
def gradient_descent(initial_theta, X, y, lambda_, eta, maxiter = 10000, tol=1e-3):
theta_before = initial_theta
for i in range(maxiter):
J, grad = lr_cost_function(theta_before, X, y, lambda_)
theta = theta_before - eta * grad
norm = np.linalg.norm(theta - theta_before)
if(i%100==0) : print("i =",i,", norm =", norm, "J =",J)
if np.linalg.norm(theta - theta_before) < tol:
print("収束完了", i)
break
theta_before = theta
return theta
# One-vs-allの訓練
def one_vs_all(X, y, num_labels, lambda_):
m = X.shape[0]
n = X.shape[1]
all_theta = np.zeros((num_labels, n + 1))
X = np.c_[np.ones(m), X]
for i in range(num_labels):
print("One vs all :", i+1, "/", num_labels)
initial_theta = np.zeros(n+1)
y_param = y == i+1
#theta = fmin_ncg(lr_cost_function_cost, initial_theta, fprime=lr_cost_function_grad, args=(X, y_param, lambda_, ), epsilon=1e-12,maxiter=1000, avextol=1e-8, disp=True)
theta = gradient_descent(initial_theta, X, y_param, lambda_, 1)
all_theta[i, :] = theta
return all_theta
num_labels = 10
lambda_ = 0.1
all_theta = one_vs_all(X_data, y, num_labels, lambda_)
print()
# 予測
def predict_one_vs_all(all_theta, X):
m = X.shape[0]
num_labels = all_theta.shape[0]
XX = np.c_[np.ones(m), X]
pred_array = sigmoid(np.dot(XX, all_theta.T))
print(pred_array)
p = np.argmax(pred_array, axis=1)+1 #行単位で集計
return p
pred = predict_one_vs_all(all_theta, X_data)
print(np.bincount(pred))
print("Training Set Accuracy: ", np.mean(pred == y) * 100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment