Skip to content

Instantly share code, notes, and snippets.

@HenryJia
Last active October 11, 2019 15:20
Show Gist options
  • Save HenryJia/b6301c20bd29fce64c8b09c89e74d77c to your computer and use it in GitHub Desktop.
Save HenryJia/b6301c20bd29fce64c8b09c89e74d77c to your computer and use it in GitHub Desktop.
t-distributed Stochatic Neightbour Embedding on MNIST
# Based on https://scipy-lectures.org/packages/scikit-learn/auto_examples/plot_tsne.html
# tSNE: https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
# MNIST Dataset: http://yann.lecun.com/exdb/mnist/
import struct
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
# Fix the random seed
np.random.seed(1234)
# Load the MNIST Dataset
with open('t10k-labels-idx1-ubyte', 'rb') as f_labels:
magic, n = struct.unpack('>II', f_labels.read(8))
y = np.fromfile(f_labels, dtype=np.uint8)
with open('t10k-images-idx3-ubyte', 'rb') as f_img:
magic, num, rows, cols = struct.unpack('>IIII', f_img.read(16))
x = np.fromfile(f_img, dtype=np.uint8)
# Generate a random permutation to shuffle our dataset
perm = np.random.permutation(y.shape[0])
# Take a subsample of 500 images
x = x.reshape((10000, 28 * 28))[perm][:500]
y = y[perm][:500]
print(x.shape, y.shape)
# Run tSNE
# Just use scikit-learn beacause I'm efficient/lazy
tsne = TSNE(n_components=2, random_state=0)
x_2d = tsne.fit_transform(x)
# Plot it
plt.figure(figsize=(10, 10))
colors = 'r', 'g', 'b', 'c', 'm', 'y', 'k', 'pink', 'orange', 'purple'
for i, (c, label) in enumerate(zip(colors, np.unique(y))):
plt.scatter(x_2d[y == i, 0], x_2d[y == i, 1], c=c, label=label)
plt.legend()
plt.savefig('tsne_mnist.png')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment