Skip to content

Instantly share code, notes, and snippets.

@kpysniak
Created September 8, 2013 01:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kpysniak/6481192 to your computer and use it in GitHub Desktop.
Save kpysniak/6481192 to your computer and use it in GitHub Desktop.
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import MiniBatchKMeans
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn import datasets
import numpy as np
import time
def getKmeansFitter(kmeans_name):
if kmeans_name == 'unique_labels':
kmeans = MiniBatchKMeans(n_clusters=true_k,
init='k-means++', n_init=1,
init_size=1000, batch_size=1000,
verbose=False,
reallocation_type='unique_labels',
reassignment_ratio=0.30)
elif kmeans_name == 'nonunique_labels':
kmeans = MiniBatchKMeans(n_clusters=true_k,
init='k-means++', n_init=1,
init_size=1000, batch_size=1000,
verbose=False,
reallocation_type='nonunique_labels',
reassignment_ratio=0.30)
elif kmeans_name == 'unique_uniform_labels':
kmeans = MiniBatchKMeans(n_clusters=true_k,
init='k-means++', n_init=1,
init_size=1000, batch_size=1000,
verbose=False,
reallocation_type='unique_uniform_labels',
reassignment_ratio=0.30)
elif kmeans_name == 'nonunique_uniform_labels':
kmeans = MiniBatchKMeans(n_clusters=true_k,
init='k-means++', n_init=1,
init_size=1000, batch_size=1000,
verbose=False,
reallocation_type='nonunique_uniform_labels',
reassignment_ratio=0.30)
return kmeans
def calculate_image_inertia(faces, kmeans, patch_size, idx):
t0 = time.time()
# The online learning part: cycle over the whole dataset 4 times
index = 0
buffer = []
for _ in range(6):
for img in faces.images:
data = extract_patches_2d(img, patch_size, max_patches=50)
data = np.reshape(data, (len(data), -1))
buffer.append(data)
index += 1
if index % 10 == 0:
data = np.concatenate(buffer, axis=0)
data -= np.mean(data, axis=0)
data /= np.std(data, axis=0)
kmeans.partial_fit(data)
buffer = []
dt = time.time() - t0
#print('#%d: Time: %f, Score: %f' %(idx, kmeans.inertia_, dt))
return (kmeans.inertia_, dt)
def print_iter_details(details, name):
iter_mean = details.mean(axis=0)[0]
iter_std = details.std(axis=0)[0]
time_mean = details.mean(axis=0)[1]
time_std = details.std(axis=0)[1]
print('%s: (mean, std): (%f, %f) time: (mean, std): (%f, %f)'
% (name, iter_mean, iter_std, time_mean, time_std))
faces = datasets.fetch_olivetti_faces()
patch_size = (20, 20)
true_k = 100
iters = 100
km_unique_labels_inertia = np.array([
calculate_image_inertia(faces,
getKmeansFitter('unique_labels'), patch_size, x)
for x in range(iters)])
print_iter_details(km_unique_labels_inertia, "km_unique_labels")
km_nonunique_labels_inertia = np.array([
calculate_image_inertia(faces,
getKmeansFitter('nonunique_labels'), patch_size, x)
for x in range(iters)])
print_iter_details(km_nonunique_labels_inertia,"km_nonunique_labels")
km_unique_uniform_labels_inertia = np.array([
calculate_image_inertia(faces,
getKmeansFitter('unique_uniform_labels'),
patch_size, x)
for x in range(iters)])
print_iter_details(km_unique_uniform_labels_inertia, "km_unique_uniform_labels")
km_nonunique_uniform_labels_inertia = np.array([
calculate_image_inertia(faces,
getKmeansFitter('nonunique_uniform_labels'),
patch_size, x)
for x in range(iters)])
print_iter_details(km_nonunique_uniform_labels_inertia,
"km_nonunique_uniform_labels")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment