Skip to content

Instantly share code, notes, and snippets.

@glemaitre
Created April 19, 2021 17:03
Show Gist options
  • Save glemaitre/e8f2054a7022abc14fbcaa9769131293 to your computer and use it in GitHub Desktop.
Save glemaitre/e8f2054a7022abc14fbcaa9769131293 to your computer and use it in GitHub Desktop.
# %%
from sklearn.datasets import fetch_openml
usps = fetch_openml(data_id=41082)
# %%
data = usps.data
target = usps.target
# %%
import numpy as np
img = np.reshape(data.iloc[0].to_numpy(), (16, 16))
# %%
import matplotlib.pyplot as plt
plt.imshow(img)
# %%
from sklearn.model_selection import train_test_split
data_rest, data_train, target_rest, target_train = train_test_split(
data, target, stratify=target, random_state=42, test_size=100,
)
data_rest, data_test, target_rest, target_test = train_test_split(
data_rest, target_rest, stratify=target_rest, random_state=42,
test_size=100,
)
data_train, data_test = data_train.to_numpy(), data_test.to_numpy()
# %%
fig, axs = plt.subplots(nrows=10, ncols=10, figsize=(15, 15))
for img, ax in zip(data_test, axs.ravel()):
ax.imshow(img.reshape((16, 16)), cmap="Greys")
ax.axis("off")
_ = fig.suptitle("Uncorrupted test dataset")
# %%
rng = np.random.RandomState(0)
noise = rng.normal(scale=0.5, size=(data_train.shape))
data_test_corrupted = data_test + noise
# %%
fig, axs = plt.subplots(nrows=10, ncols=10, figsize=(15, 15))
for img, ax in zip(data_test_corrupted, axs.ravel()):
ax.imshow(img.reshape((16, 16)), cmap="Greys")
ax.axis("off")
_ = fig.suptitle(
f"Corrupted test data: "
f"MSE={np.mean((data_test - data_test_corrupted) ** 2):.2f}",
size=26,
)
# %%
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(
n_components=80, kernel="rbf", gamma=0.5, fit_inverse_transform=True,
alpha=10,
)
# %%
kpca.fit(data_train)
# %%
import pandas as pd
data_reconstruct = kpca.inverse_transform(kpca.transform(data_test))
# %%
fig, axs = plt.subplots(nrows=10, ncols=10, figsize=(15, 15))
for img, ax in zip(data_reconstruct, axs.ravel()):
ax.imshow(img.reshape((16, 16)), cmap="Greys")
ax.axis("off")
_ = fig.suptitle(
f"Denoising using Kernel PCA with RBF kernel: "
f"MSE={np.mean((data_test - data_reconstruct) ** 2):.2f}",
size=26,
)
# %%
from sklearn.decomposition import PCA
pca = PCA(n_components=32)
pca.fit(data_train)
data_reconstruct = pca.inverse_transform(pca.transform(data_test_corrupted))
# %%
fig, axs = plt.subplots(nrows=10, ncols=10, figsize=(15, 15))
for img, ax in zip(data_reconstruct, axs.ravel()):
ax.imshow(img.reshape((16, 16)), cmap="Greys")
ax.axis("off")
_ = fig.suptitle(
f"Denosing using PCA: "
f"MSE={np.mean((data_test - data_reconstruct) ** 2):.2f}",
size=26
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment