Created
May 10, 2018 17:15
-
-
Save koshian2/790f0ab03e05116957ab02450d3c173d to your computer and use it in GitHub Desktop.
Coursera Machine LearningをPythonで実装 - [Week8]k-Means, 主成分分析(PCA)(4)主成分分析、組み込み
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.decomposition import PCA | |
from scipy.io import loadmat | |
from sklearn.preprocessing import StandardScaler | |
## 1.2次元の主成分分析 | |
#データの読み込み(自分で実装するケースでハードコーディングしたものと同じ) | |
X = np.array(loadmat("ex7data1.mat")['X']) | |
# 標準化 | |
# 参考:http://scikit-learn.org/stable/auto_examples/preprocessing/plot_scaling_importance.html | |
scaler = StandardScaler() | |
X_norm = scaler.fit_transform(X) | |
# PCA(標準化を事前に行うこと) | |
pca = PCA(n_components=1, svd_solver="full") | |
X_project = pca.fit_transform(X_norm) | |
X_recover = pca.inverse_transform(X_project) | |
# 元データ | |
print("Original fist sample:", X_norm[0, :]) | |
# PCAで次元削減 | |
print("Projection of the first example:", X_project[0]) | |
# 削減したデータの次元を戻す | |
print("Approximation of the first example:", X_recover[0,:]) | |
# 説明される分散比(n_componentsはこれで調整する、次元削減しない[=N]なら1) | |
print("Explained variance ratio:", pca.explained_variance_ratio_) | |
print() | |
## 2.顔画像 | |
X = np.array(loadmat("ex7faces.mat")['X']) | |
# プロットしてみる | |
def display_data(X, example_width = np.sqrt(X.shape[1]).astype(int)): | |
m, n = X.shape | |
display_rows = np.floor(np.sqrt(m)).astype(int) | |
display_cols = np.ceil(m / display_rows).astype(int) | |
fig = plt.figure(figsize=(5,5)) | |
fig.subplots_adjust(hspace=0.05, wspace=0.05) | |
for i in range(X.shape[0]): | |
ax = fig.add_subplot(display_rows, display_cols, i+1, xticks=[], yticks=[]) | |
ax.imshow(X[i, :].reshape(example_width, example_width, order="F"), cmap="gray") | |
# 元は1024次元 | |
display_data(X[:100, :]) | |
plt.suptitle("Original faces") | |
plt.show() | |
# 標準化する | |
scaler = StandardScaler() | |
X_norm = scaler.fit_transform(X) | |
# 主成分分析で100次元にする | |
pca = PCA(n_components=100) | |
Z = pca.fit_transform(X_norm) | |
print("Faces data : PCA 1024dim -> 100dim, Explained varience ratio:", np.sum(pca.explained_variance_ratio_)) | |
# 固有ベクトルの最初の36個をプロット | |
display_data(pca.components_[:36, :]) | |
plt.suptitle("Eigen vectors") | |
plt.show() | |
# 復元する | |
X_rec = pca.inverse_transform(Z) | |
display_data(X_rec[:100,]) | |
plt.suptitle("Recovered faces") | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment