Skip to content

Instantly share code, notes, and snippets.

@koshian2
Created May 25, 2018 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save koshian2/5edc72b54748b40c63e85fca5c0152ee to your computer and use it in GitHub Desktop.
Save koshian2/5edc72b54748b40c63e85fca5c0152ee to your computer and use it in GitHub Desktop.
Passive Aggressive アルゴリズムを使ったSVMのミニバッチ学習もどき
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.decomposition import PCA
# データ読み込み
mnist = fetch_mldata('MNIST original')
# 255で割る
original_data = mnist["data"] / 255
# ノルムが1になるように標準化
original_data = original_data / np.linalg.norm(original_data, ord=2, axis=1)[:, np.newaxis]
# 訓練-テストの分割
X, Xtest, y, ytest = train_test_split(original_data, mnist['target'], test_size=0.2, random_state=114514)
pac = PassiveAggressiveClassifier(C=0.1, shuffle=False)
# 反復回数
iter_count = 5
# 反復ごとの精度
train_accuracies = np.zeros(iter_count)
# ミニバッチのサイズ
minibatch_size = 32
# ミニバッチの回数
n_minibatch = np.ceil(len(y)/32).astype(int)
# ミニバッチごとの精度
minibatch_accuracies = np.zeros(iter_count * n_minibatch)
# ミニバッチごとの係数(プロット用に主成分分析で2次元にする)
minibatch_coefs = np.zeros((minibatch_accuracies.shape[0], 2))
# プロット用の主成分分析
pca_decomp = PCA(2)
# シャッフルする際の訓練データのインデックス
train_indices = np.arange(len(y))
np.random.seed(114514)
for k in range(iter_count):
# インデックスのシャッフル
np.random.shuffle(train_indices)
for i in range(n_minibatch):
# 取り出すインデックス
start_idx = i * 32
end_idx = min((i + 1)*32, len(y))
minibatch_indices = train_indices[start_idx:end_idx]
X_minibatch, y_minibatch = X[minibatch_indices, :], y[minibatch_indices]
# ミニバッチの適用
pac.partial_fit(X_minibatch, y_minibatch, classes=np.arange(10))
# ミニバッチの訓練データに対する精度
minibatch_score = pac.score(X_minibatch, y_minibatch)
print(start_idx, "->", end_idx, "Mini-batch Accuracy:", minibatch_score)
minibatch_accuracies[k * n_minibatch + i] = minibatch_score
# 係数
minibatch_coefs[k * n_minibatch + i, :] = pca_decomp.fit_transform(pac.coef_)[0]
# 訓練データに対する精度
train_accuracies[k] = pac.score(X, y)
print("Train Accuracy:", pac.score(X, y))
print("Test Acuuracy:", pac.score(Xtest, ytest))
print(pac.coef_.shape)
print(pac.intercept_.shape)
plt.plot(np.arange(iter_count)+1, train_accuracies, marker="o")
plt.xlabel("Total iteration")
plt.ylabel("Accuracy")
plt.show()
plt.plot(np.arange(iter_count * n_minibatch)+1, minibatch_accuracies)
plt.xlabel("Minibatch iteration")
plt.ylabel("Accuracy")
plt.show()
plt.plot(minibatch_coefs[:, 0], minibatch_coefs[:, 1], linewidth=0.5)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment