Last active
July 20, 2017 19:19
-
-
Save xboard/dd88c9797ac327073af892b6812c4ecf to your computer and use it in GitHub Desktop.
Naive implementation of a Latent SVM classifier.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# This is a experiment in developing a LATENT SVM classifier. | |
# | |
# | |
import matplotlib.pyplot as plt | |
import numpy as np | |
# Import datasets, classifiers and performance metrics | |
from sklearn import datasets, svm, metrics, preprocessing | |
np.random.seed(1111) | |
# The digits dataset | |
digits = datasets.load_digits() | |
# To apply a classifier on this data, we need to flatten the image, to | |
# turn the data in a (samples, feature) matrix: | |
n_samples = len(digits.images) | |
data = digits.images.reshape((n_samples, -1)) | |
data = data - np.mean(data) | |
n_features = len(data[0]) | |
y = digits.target % 2 | |
y_latent = digits.target | |
# Create a classifier: a support vector classifier | |
clf = svm.SVC(gamma=0.001) | |
latent_card = 5 | |
y_card = 2 | |
Z = np.random.randint(0, latent_card, size=n_samples) | |
print("INITIAL Latent class counts: %s" % repr(np.bincount(Z))) | |
YZ = 10*y + Z | |
# Initialization | |
clf.fit(data[:n_samples // 2], YZ[:n_samples // 2]) | |
old_score = 0.0 | |
score = clf.score(data, YZ) | |
print(score) | |
i=0 | |
while not np.isclose(score, old_score): | |
print("=="*30) | |
i += 1 | |
print("Iteration {}".format(i)) | |
YZ = clf.predict(data[:n_samples // 2]) | |
Z = YZ % 10 | |
YZ = 10 * y[:n_samples // 2] + Z | |
clf.fit(data[:n_samples // 2], YZ[:n_samples // 2]) | |
score, old_score = clf.score(data[:n_samples // 2], YZ[:n_samples // 2]), score | |
print("score={}".format(score)) | |
YZ = clf.predict(data[n_samples // 2: ]) | |
pred_Z = YZ % 10 + 5*(YZ // 10) | |
pred_Y = YZ // 10 | |
expected = y[n_samples // 2 :] | |
expected_latent = y_latent[n_samples // 2: ] | |
print("Classification report for classifier %s:\n%s\n" | |
% (clf, metrics.classification_report(expected, pred_Y))) | |
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, pred_Y)) | |
print("Latent class counts: %s" % repr(np.bincount(pred_Z))) | |
# plot first few digits from each latent class | |
X_test = data[n_samples // 2:] | |
h_pred = pred_Z | |
n_latent_classes = latent_card * y_card | |
n_examples = 7 | |
print(X_test.shape) | |
print(pred_Z.shape) | |
plt.figure(figsize=(3, 5)) | |
plt.suptitle("Example digits from each of\nthe ten latent classes.") | |
for latent_class in range(n_latent_classes): | |
examples = X_test[h_pred == latent_class][:n_examples] | |
for k, example in enumerate(examples): | |
plt.subplot(n_latent_classes, n_examples, | |
1 + (n_examples * latent_class + k)) | |
plt.imshow(example.reshape((8, 8)), cmap=plt.cm.gray_r) | |
plt.xticks(()) | |
plt.yticks(()) | |
plt.subplots_adjust(.02, .04, .96, .88, .12, .18) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment