Skip to content

Instantly share code, notes, and snippets.

@koshian2
Created May 10, 2018 17:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save koshian2/790f0ab03e05116957ab02450d3c173d to your computer and use it in GitHub Desktop.
Save koshian2/790f0ab03e05116957ab02450d3c173d to your computer and use it in GitHub Desktop.
Coursera Machine LearningをPythonで実装 - [Week8]k-Means, 主成分分析(PCA)(4)主成分分析、組み込み
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from scipy.io import loadmat
from sklearn.preprocessing import StandardScaler
## 1.2次元の主成分分析
#データの読み込み(自分で実装するケースでハードコーディングしたものと同じ)
X = np.array(loadmat("ex7data1.mat")['X'])
# 標準化
# 参考:http://scikit-learn.org/stable/auto_examples/preprocessing/plot_scaling_importance.html
scaler = StandardScaler()
X_norm = scaler.fit_transform(X)
# PCA(標準化を事前に行うこと)
pca = PCA(n_components=1, svd_solver="full")
X_project = pca.fit_transform(X_norm)
X_recover = pca.inverse_transform(X_project)
# 元データ
print("Original fist sample:", X_norm[0, :])
# PCAで次元削減
print("Projection of the first example:", X_project[0])
# 削減したデータの次元を戻す
print("Approximation of the first example:", X_recover[0,:])
# 説明される分散比(n_componentsはこれで調整する、次元削減しない[=N]なら1)
print("Explained variance ratio:", pca.explained_variance_ratio_)
print()
## 2.顔画像
X = np.array(loadmat("ex7faces.mat")['X'])
# プロットしてみる
def display_data(X, example_width = np.sqrt(X.shape[1]).astype(int)):
m, n = X.shape
display_rows = np.floor(np.sqrt(m)).astype(int)
display_cols = np.ceil(m / display_rows).astype(int)
fig = plt.figure(figsize=(5,5))
fig.subplots_adjust(hspace=0.05, wspace=0.05)
for i in range(X.shape[0]):
ax = fig.add_subplot(display_rows, display_cols, i+1, xticks=[], yticks=[])
ax.imshow(X[i, :].reshape(example_width, example_width, order="F"), cmap="gray")
# 元は1024次元
display_data(X[:100, :])
plt.suptitle("Original faces")
plt.show()
# 標準化する
scaler = StandardScaler()
X_norm = scaler.fit_transform(X)
# 主成分分析で100次元にする
pca = PCA(n_components=100)
Z = pca.fit_transform(X_norm)
print("Faces data : PCA 1024dim -> 100dim, Explained varience ratio:", np.sum(pca.explained_variance_ratio_))
# 固有ベクトルの最初の36個をプロット
display_data(pca.components_[:36, :])
plt.suptitle("Eigen vectors")
plt.show()
# 復元する
X_rec = pca.inverse_transform(Z)
display_data(X_rec[:100,])
plt.suptitle("Recovered faces")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment