Created
May 25, 2018 14:36
-
-
Save koshian2/5edc72b54748b40c63e85fca5c0152ee to your computer and use it in GitHub Desktop.
Passive Aggressive アルゴリズムを使ったSVMのミニバッチ学習もどき
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.datasets import fetch_mldata | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import PassiveAggressiveClassifier | |
from sklearn.decomposition import PCA | |
# データ読み込み | |
mnist = fetch_mldata('MNIST original') | |
# 255で割る | |
original_data = mnist["data"] / 255 | |
# ノルムが1になるように標準化 | |
original_data = original_data / np.linalg.norm(original_data, ord=2, axis=1)[:, np.newaxis] | |
# 訓練-テストの分割 | |
X, Xtest, y, ytest = train_test_split(original_data, mnist['target'], test_size=0.2, random_state=114514) | |
pac = PassiveAggressiveClassifier(C=0.1, shuffle=False) | |
# 反復回数 | |
iter_count = 5 | |
# 反復ごとの精度 | |
train_accuracies = np.zeros(iter_count) | |
# ミニバッチのサイズ | |
minibatch_size = 32 | |
# ミニバッチの回数 | |
n_minibatch = np.ceil(len(y)/32).astype(int) | |
# ミニバッチごとの精度 | |
minibatch_accuracies = np.zeros(iter_count * n_minibatch) | |
# ミニバッチごとの係数(プロット用に主成分分析で2次元にする) | |
minibatch_coefs = np.zeros((minibatch_accuracies.shape[0], 2)) | |
# プロット用の主成分分析 | |
pca_decomp = PCA(2) | |
# シャッフルする際の訓練データのインデックス | |
train_indices = np.arange(len(y)) | |
np.random.seed(114514) | |
for k in range(iter_count): | |
# インデックスのシャッフル | |
np.random.shuffle(train_indices) | |
for i in range(n_minibatch): | |
# 取り出すインデックス | |
start_idx = i * 32 | |
end_idx = min((i + 1)*32, len(y)) | |
minibatch_indices = train_indices[start_idx:end_idx] | |
X_minibatch, y_minibatch = X[minibatch_indices, :], y[minibatch_indices] | |
# ミニバッチの適用 | |
pac.partial_fit(X_minibatch, y_minibatch, classes=np.arange(10)) | |
# ミニバッチの訓練データに対する精度 | |
minibatch_score = pac.score(X_minibatch, y_minibatch) | |
print(start_idx, "->", end_idx, "Mini-batch Accuracy:", minibatch_score) | |
minibatch_accuracies[k * n_minibatch + i] = minibatch_score | |
# 係数 | |
minibatch_coefs[k * n_minibatch + i, :] = pca_decomp.fit_transform(pac.coef_)[0] | |
# 訓練データに対する精度 | |
train_accuracies[k] = pac.score(X, y) | |
print("Train Accuracy:", pac.score(X, y)) | |
print("Test Acuuracy:", pac.score(Xtest, ytest)) | |
print(pac.coef_.shape) | |
print(pac.intercept_.shape) | |
plt.plot(np.arange(iter_count)+1, train_accuracies, marker="o") | |
plt.xlabel("Total iteration") | |
plt.ylabel("Accuracy") | |
plt.show() | |
plt.plot(np.arange(iter_count * n_minibatch)+1, minibatch_accuracies) | |
plt.xlabel("Minibatch iteration") | |
plt.ylabel("Accuracy") | |
plt.show() | |
plt.plot(minibatch_coefs[:, 0], minibatch_coefs[:, 1], linewidth=0.5) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment