Created
July 23, 2020 20:26
-
-
Save danielmk/3fce176e6df2ea85e0ba163472ab09ab to your computer and use it in GitHub Desktop.
Animation showing MNIST samples reconstructed from an increasing number of principal components.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
The functions in this code are taken from a Neuromatch Academy Tutorial about dimensionality | |
reduction by Alex Cayco Gajic & John Murray: | |
https://github.com/NeuromatchAcademy/course-content/tree/master/tutorials/W1D5_DimensionalityReduction | |
Neuromatch Academy content is licensed under a Creative Commons Attribution 4.0 International | |
https://github.com/NeuromatchAcademy/course-content/blob/master/LICENSE.md | |
I use these functions to create an animation showing a subset of MNIST samples | |
reconstructed from an increasing number of principal components. | |
@author: Daniel Müller-Komorowska | |
""" | |
import numpy as np | |
from sklearn.decomposition import PCA | |
from sklearn.datasets import fetch_openml | |
import matplotlib.pyplot as plt | |
import cv2 | |
from matplotlib.animation import FuncAnimation | |
import matplotlib as mpl | |
# Load the MNIST data as supplied by sklearn | |
mnist = fetch_openml(name='mnist_784') | |
X = mnist.data | |
"""The following function definitions are all from Neuromatch Academy""" | |
def change_of_basis(X, W): | |
""" | |
Projects data onto a new basis. | |
Args: | |
X (numpy array of floats) : Data matrix each column corresponding to a | |
different random variable | |
W (numpy array of floats) : new orthonormal basis columns correspond to | |
basis vectors | |
Returns: | |
(numpy array of floats) : Data matrix expressed in new basis | |
""" | |
Y = np.matmul(X, W) | |
return Y | |
def get_sample_cov_matrix(X): | |
""" | |
Returns the sample covariance matrix of data X. | |
Args: | |
X (numpy array of floats) : Data matrix each column corresponds to a | |
different random variable | |
Returns: | |
(numpy array of floats) : Covariance matrix | |
""" | |
X = X - np.mean(X, 0) | |
cov_matrix = 1 / X.shape[0] * np.matmul(X.T, X) | |
return cov_matrix | |
def sort_evals_descending(evals, evectors): | |
""" | |
Sorts eigenvalues and eigenvectors in decreasing order. Also aligns first two | |
eigenvectors to be in first two quadrants (if 2D). | |
Args: | |
evals (numpy array of floats) : Vector of eigenvalues | |
evectors (numpy array of floats) : Corresponding matrix of eigenvectors | |
each column corresponds to a different | |
eigenvalue | |
Returns: | |
(numpy array of floats) : Vector of eigenvalues after sorting | |
(numpy array of floats) : Matrix of eigenvectors after sorting | |
""" | |
index = np.flip(np.argsort(evals)) | |
evals = evals[index] | |
evectors = evectors[:, index] | |
if evals.shape[0] == 2: | |
if np.arccos(np.matmul(evectors[:, 0], | |
1 / np.sqrt(2) * np.array([1, 1]))) > np.pi / 2: | |
evectors[:, 0] = -evectors[:, 0] | |
if np.arccos(np.matmul(evectors[:, 1], | |
1 / np.sqrt(2)*np.array([-1, 1]))) > np.pi / 2: | |
evectors[:, 1] = -evectors[:, 1] | |
return evals, evectors | |
def pca(X): | |
""" | |
Performs PCA on multivariate data. Eigenvalues are sorted in decreasing order | |
Args: | |
X (numpy array of floats) : Data matrix each column corresponds to a | |
different random variable | |
Returns: | |
(numpy array of floats) : Data projected onto the new basis | |
(numpy array of floats) : Vector of eigenvalues | |
(numpy array of floats) : Corresponding matrix of eigenvectors | |
""" | |
X = X - np.mean(X, 0) | |
cov_matrix = get_sample_cov_matrix(X) | |
evals, evectors = np.linalg.eigh(cov_matrix) | |
evals, evectors = sort_evals_descending(evals, evectors) | |
score = change_of_basis(X, evectors) | |
return score, evectors, evals | |
def reconstruct_data(score, evectors, X_mean, K): | |
""" | |
Reconstruct the data based on the top K components. | |
Args: | |
score (numpy array of floats) : Score matrix | |
evectors (numpy array of floats) : Matrix of eigenvectors | |
X_mean (numpy array of floats) : Vector corresponding to data mean | |
K (scalar) : Number of components to include | |
Returns: | |
(numpy array of floats) : Matrix of reconstructed data | |
""" | |
# Reconstruct the data from the score and eigenvectors | |
X_reconstructed = np.matmul(score[:, :K], evectors[:, :K].T) + X_mean | |
return X_reconstructed | |
# Choose the MNIST samplest to animate. I manually chose those | |
choices = np.array([0, 5, 12, 19, 20, 25, 31, 32, 34], dtype=np.int) | |
# Calculate feature mean. pca() subtracts those. | |
X_mean = X.mean(axis=0) | |
# Run the pca | |
score, evectors, evals = pca(X) | |
# Reconstruct the data with increasing number of principal components | |
# Save only the sample choices to be animated later | |
rec_list = [] | |
for x in range(0,X.shape[1]+1): | |
rec_list.append(reconstruct_data(score,evectors, X_mean, K=x)[choices,:].copy()) | |
# Generate a single image from the nine samples | |
# There might be a better way to do this but cv2 does the job | |
images = [] | |
for i in rec_list: | |
x = [] | |
for ii in i: | |
x.append(ii.reshape(28,28)) | |
curr_img = cv2.hconcat([cv2.vconcat([x[0], x[1], x[2]]), | |
cv2.vconcat([x[3], x[4], x[5]]), | |
cv2.vconcat([x[6], x[7], x[8]])]) | |
images.append(curr_img) | |
# Increase default font size | |
mpl.rcParams['font.size'] = 16 | |
# Create figure and axes for plotting | |
fig, ax = plt.subplots(1) | |
# This function is passed to FunAnimation. It update the plot for each frame i | |
def update_plot(i): | |
ax.clear() | |
ax.axis('off') | |
ax.imshow(images[i], 'gray') | |
ax.set_title("Principal Components: " + str(i).zfill(3)) | |
# Create animation | |
anim = FuncAnimation(fig, | |
update_plot, | |
frames=np.arange(0, len(images))) | |
# Save the animation | |
anim.save('pca_reconstruction6.mp4', | |
fps=30, | |
dpi=150, | |
writer='ffmpeg') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment