Created
September 8, 2013 01:49
-
-
Save kpysniak/6481192 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.cluster import MiniBatchKMeans | |
from sklearn.feature_extraction.image import extract_patches_2d | |
from sklearn import datasets | |
import numpy as np | |
import time | |
def getKmeansFitter(kmeans_name): | |
if kmeans_name == 'unique_labels': | |
kmeans = MiniBatchKMeans(n_clusters=true_k, | |
init='k-means++', n_init=1, | |
init_size=1000, batch_size=1000, | |
verbose=False, | |
reallocation_type='unique_labels', | |
reassignment_ratio=0.30) | |
elif kmeans_name == 'nonunique_labels': | |
kmeans = MiniBatchKMeans(n_clusters=true_k, | |
init='k-means++', n_init=1, | |
init_size=1000, batch_size=1000, | |
verbose=False, | |
reallocation_type='nonunique_labels', | |
reassignment_ratio=0.30) | |
elif kmeans_name == 'unique_uniform_labels': | |
kmeans = MiniBatchKMeans(n_clusters=true_k, | |
init='k-means++', n_init=1, | |
init_size=1000, batch_size=1000, | |
verbose=False, | |
reallocation_type='unique_uniform_labels', | |
reassignment_ratio=0.30) | |
elif kmeans_name == 'nonunique_uniform_labels': | |
kmeans = MiniBatchKMeans(n_clusters=true_k, | |
init='k-means++', n_init=1, | |
init_size=1000, batch_size=1000, | |
verbose=False, | |
reallocation_type='nonunique_uniform_labels', | |
reassignment_ratio=0.30) | |
return kmeans | |
def calculate_image_inertia(faces, kmeans, patch_size, idx): | |
t0 = time.time() | |
# The online learning part: cycle over the whole dataset 4 times | |
index = 0 | |
buffer = [] | |
for _ in range(6): | |
for img in faces.images: | |
data = extract_patches_2d(img, patch_size, max_patches=50) | |
data = np.reshape(data, (len(data), -1)) | |
buffer.append(data) | |
index += 1 | |
if index % 10 == 0: | |
data = np.concatenate(buffer, axis=0) | |
data -= np.mean(data, axis=0) | |
data /= np.std(data, axis=0) | |
kmeans.partial_fit(data) | |
buffer = [] | |
dt = time.time() - t0 | |
#print('#%d: Time: %f, Score: %f' %(idx, kmeans.inertia_, dt)) | |
return (kmeans.inertia_, dt) | |
def print_iter_details(details, name): | |
iter_mean = details.mean(axis=0)[0] | |
iter_std = details.std(axis=0)[0] | |
time_mean = details.mean(axis=0)[1] | |
time_std = details.std(axis=0)[1] | |
print('%s: (mean, std): (%f, %f) time: (mean, std): (%f, %f)' | |
% (name, iter_mean, iter_std, time_mean, time_std)) | |
faces = datasets.fetch_olivetti_faces() | |
patch_size = (20, 20) | |
true_k = 100 | |
iters = 100 | |
km_unique_labels_inertia = np.array([ | |
calculate_image_inertia(faces, | |
getKmeansFitter('unique_labels'), patch_size, x) | |
for x in range(iters)]) | |
print_iter_details(km_unique_labels_inertia, "km_unique_labels") | |
km_nonunique_labels_inertia = np.array([ | |
calculate_image_inertia(faces, | |
getKmeansFitter('nonunique_labels'), patch_size, x) | |
for x in range(iters)]) | |
print_iter_details(km_nonunique_labels_inertia,"km_nonunique_labels") | |
km_unique_uniform_labels_inertia = np.array([ | |
calculate_image_inertia(faces, | |
getKmeansFitter('unique_uniform_labels'), | |
patch_size, x) | |
for x in range(iters)]) | |
print_iter_details(km_unique_uniform_labels_inertia, "km_unique_uniform_labels") | |
km_nonunique_uniform_labels_inertia = np.array([ | |
calculate_image_inertia(faces, | |
getKmeansFitter('nonunique_uniform_labels'), | |
patch_size, x) | |
for x in range(iters)]) | |
print_iter_details(km_nonunique_uniform_labels_inertia, | |
"km_nonunique_uniform_labels") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment