Skip to content

Instantly share code, notes, and snippets.

@amueller
Last active August 29, 2015 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amueller/dce0841b9bda4a597335 to your computer and use it in GitHub Desktop.
Save amueller/dce0841b9bda4a597335 to your computer and use it in GitHub Desktop.
benching elkan k-means implementation
from sklearn.cluster import KMeans
from time import time
from sklearn.datasets import load_digits, fetch_mldata, load_iris, fetch_20newsgroups_vectorized
def bench_kmeans(data, n_clusters=5, init='random', n_init=1):
start = time()
km1 = KMeans(algorithm='lloyd', n_clusters=n_clusters, random_state=0, init=init, n_init=n_init).fit(X)
print("lloyd time: %f inertia: %f" % (time() - start, km1.inertia_))
start = time()
km2 = KMeans(algorithm='elkan', n_clusters=n_clusters, random_state=0, init=init, n_init=n_init).fit(X)
print("elkan time: %3.4f inertia: %f" % (time() - start, km2.inertia_))
mnist = fetch_mldata("MNIST original")
digits = load_digits()
iris = load_iris()
for n_clusters in [3, 10, 100]:
print("\nn_clusters = %d" % n_clusters)
for dataset, X in [("digits", digits.data), ("iris", iris.data), ("mnist", mnist.data), ("mnist.T", mnist.data.T)]:
print(dataset)
bench_kmeans(X, init='random', n_clusters=n_clusters)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment