Last active
August 29, 2015 14:08
-
-
Save hurutoriya/a8f65631d88c76bc5213 to your computer and use it in GitHub Desktop.
Basic Spectral Clustering Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/Cellar/python/2.7.6/bin/python | |
# -*- coding: utf-8 -*- | |
import sys | |
import scipy.misc, scipy.io, scipy.optimize, scipy.cluster.vq, scipy.spatial | |
from numpy import * | |
#import matplotlib as mpl | |
from matplotlib import pyplot, cm, colors, lines, animation | |
import matplotlib.pyplot as plt | |
from mpl_toolkits.mplot3d import Axes3D | |
from sklearn import svm, datasets, metrics | |
from pylab import imread, imshow, gray | |
def gaussianDistance(X, sigma=1.0): | |
m = shape(X)[0] | |
adjacency = zeros((m, m)) # 隣接行列の初期化 | |
for i in range(0, m): | |
for j in range(0, m): | |
if i >= j: # 対称行列、上三角のみ計算して無駄を省く | |
continue | |
# データ間の座標差分の二乗、対角成分は零(同じデータだから) | |
# 連結性はユークリッド距離、ガウシアン距離、最近傍法で測定可能 | |
adjacency[j, i] = adjacency[i, j] = sum((X[i] - X[j]) ** 2) | |
adjacency = exp(-adjacency / (2 * sigma ** 2)) - identity(m) | |
return adjacency | |
def degreeMatrix(adjacency): | |
# 行の成分を全て足す | |
return adjacency.sum(axis=1) | |
def main(): | |
# サンプルの数 | |
no_of_samples = 100 | |
# データセット生成 | |
data = [] | |
# 月型のデータセット | |
#data.append(datasets.make_moons(n_samples=no_of_samples, noise=0.05)[0]) | |
# 同心円状のデータセット | |
data.append(datasets.make_circles(n_samples=no_of_samples, factor=0.5, noise=0.05)[0]) | |
# 期待するクラスタリングの数 | |
K = 2 | |
for X in data: | |
# データセットから近接行列、度行列(どれだけの頂点が隣接しているか),Laplacian Matrixを生成 | |
adjacency = gaussianDistance(X, sigma=0.1) | |
degree = degreeMatrix(adjacency) | |
L = diag(degree) - adjacency | |
# 正規化されたLaplacian Matrix | |
deg_05 = diag(degree ** -0.5) | |
L = deg_05.dot(L).dot(deg_05) | |
# Laplacian Matrix の固有値・固有ベクトルを求める | |
eigenvalues, eigenvectors = linalg.eig(L) | |
# 固有値を昇順に並べる, 零固有値集合の最初からK番目までの値を用いてデータセットの連結性を表現 | |
idx = eigenvalues.argsort() | |
eigenvalues.sort() | |
evecs = eigenvectors[:, idx] | |
eigenvectors = evecs[:, 0:K] | |
print eigenvalues[0:K] | |
color_array = ['w', 'r', 'g', 'y'] | |
makers = ['o', 'v'] | |
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) | |
# K-meansをそのまま利用して出力 | |
centroids, labels = scipy.cluster.vq.kmeans2(X, K) | |
data = c_[X, labels] | |
ax1.set_title('K-means clustering') | |
ax1.set_ylabel('y') | |
ax1.set_xlabel('x') | |
for k in range(0, K): | |
print k | |
ax1.scatter(data[data[:, 2] == k, 0], data[data[:, 2] == k, 1], c=color_array[k], marker=makers[k]) | |
# spectral clusteringを利用する(つまり固有ベクトルをK-meansする) | |
centroids, labels = scipy.cluster.vq.kmeans2(eigenvectors, K) | |
data = c_[X, labels] | |
ax2.set_title('Spectral clustering') | |
ax2.set_ylabel('y') | |
ax2.set_xlabel('x') | |
for k in range(0, K): | |
ax2.scatter(data[data[:, 2] == k, 0], data[data[:, 2] == k, 1], c=color_array[k], marker=makers[k]) | |
# 固有ベクトルを出力 | |
''' | |
data = c_[eigenvectors, labels] | |
ax = fig.add_subplot(133) | |
ax.set_title('K-eigenvectors') | |
ax.set_ylabel('Second Elements') | |
ax.set_xlabel('First Elements') | |
print eigenvectors | |
for k in range(0, K): | |
ax.scatter(data[data[:, 2] == k, 0], data[data[:, 2] == k, 1], c=color_array[k], marker=makers[k]) | |
''' | |
pyplot.show() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment