Skip to content

Instantly share code, notes, and snippets.

@gravitino
Created February 28, 2016 17:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gravitino/c7984d88354101d566e8 to your computer and use it in GitHub Desktop.
Save gravitino/c7984d88354101d566e8 to your computer and use it in GitHub Desktop.
scalable spectral clustering using efficient out of sample extension by estimating the feature transform
import numpy as np
import pylab as pl
#import seaborn as sns
from sklearn.datasets import make_blobs, make_circles
def rbf_kernel(X, beta=None, p=2):
num_data, num_feat = X.shape
kernel = np.zeros((num_data, num_data))
for i in range(num_data):
for j in range(i, num_data):
kernel[i, j] = \
kernel[j, i] = np.sum(abs(X[i]-X[j])**p)
if beta == None:
beta = np.std(kernel)/4
beta = abs(beta)
kernel = np.exp(-kernel/beta**p)
print "typical length in rbf kernel", beta
return kernel
def plot_with_labels(X, labels):
colors = {0: "red", 1: "blue", 2: "green", 3: "black", 4: "magenta"}
k = max(labels)+1
Xs, Ys = [[] for _ in range(k)], [[] for _ in range(k)]
for label, (x, y) in zip(labels, X):
Xs[label].append(x)
Ys[label].append(y)
for label, (x, y) in enumerate(zip(Xs, Ys)):
pl.plot(x, y, "o", c=colors[label % 5])
from sklearn.cluster import KMeans
def spectral_clustering(kernel, k=3, weighted=True):
if weighted:
weights = np.sum(kernel, axis=0)
else:
weights = np.ones(kernel.shape[0])
D = np.diag(1/np.sqrt(weights))
L = np.eye(kernel.shape[0])-D.dot(kernel).dot(D)
U, S, V = np.linalg.svd(L)
return KMeans(n_clusters=k).fit_predict(U[:,-2-k:-2]), U[:,-2-k:-2]
from sklearn.metrics import adjusted_rand_score
n_samples, k = 2000, 2
X, Y = make_blobs(n_samples=n_samples, n_features=2, centers=2, cluster_std=1.0)
X, Y = make_circles(n_samples=n_samples, factor=.5, noise=.05)
pl.subplot(231)
plot_with_labels(X, Y)
K = rbf_kernel(X)
L, E = spectral_clustering(K, k)
pl.subplot(232)
plot_with_labels(X, L)
pl.subplot(233)
plot_with_labels(E[:,:2], L)
print "rand score insample", adjusted_rand_score(Y, L)
from sklearn.neighbors import KNeighborsRegressor
rfr0 = KNeighborsRegressor(n_jobs=-1).fit(X, E[:,0])
rfr1 = KNeighborsRegressor(n_jobs=-1).fit(X, E[:,1])
print rfr0
print rfr1
XX, YY = make_circles(n_samples=10000*n_samples, factor=.5, noise=.05)
FF = np.vstack((rfr0.predict(XX), rfr1.predict(XX))).T
LL = KMeans(n_clusters=k).fit_predict(FF[:,:1])
print "finished computation"
pl.subplot(234)
plot_with_labels(XX, YY)
pl.subplot(235)
plot_with_labels(XX, LL)
pl.subplot(236)
plot_with_labels(FF, LL)
print "rand score outsample", adjusted_rand_score(YY, LL)
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment