Skip to content

Instantly share code, notes, and snippets.

@hurutoriya
Last active August 29, 2015 14:08
Show Gist options
  • Save hurutoriya/a8f65631d88c76bc5213 to your computer and use it in GitHub Desktop.
Save hurutoriya/a8f65631d88c76bc5213 to your computer and use it in GitHub Desktop.
Basic Spectral Clustering Code
#!/usr/local/Cellar/python/2.7.6/bin/python
# -*- coding: utf-8 -*-
import sys
import scipy.misc, scipy.io, scipy.optimize, scipy.cluster.vq, scipy.spatial
from numpy import *
#import matplotlib as mpl
from matplotlib import pyplot, cm, colors, lines, animation
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm, datasets, metrics
from pylab import imread, imshow, gray
def gaussianDistance(X, sigma=1.0):
m = shape(X)[0]
adjacency = zeros((m, m)) # 隣接行列の初期化
for i in range(0, m):
for j in range(0, m):
if i >= j: # 対称行列、上三角のみ計算して無駄を省く
continue
# データ間の座標差分の二乗、対角成分は零(同じデータだから)
# 連結性はユークリッド距離、ガウシアン距離、最近傍法で測定可能
adjacency[j, i] = adjacency[i, j] = sum((X[i] - X[j]) ** 2)
adjacency = exp(-adjacency / (2 * sigma ** 2)) - identity(m)
return adjacency
def degreeMatrix(adjacency):
# 行の成分を全て足す
return adjacency.sum(axis=1)
def main():
# サンプルの数
no_of_samples = 100
# データセット生成
data = []
# 月型のデータセット
#data.append(datasets.make_moons(n_samples=no_of_samples, noise=0.05)[0])
# 同心円状のデータセット
data.append(datasets.make_circles(n_samples=no_of_samples, factor=0.5, noise=0.05)[0])
# 期待するクラスタリングの数
K = 2
for X in data:
# データセットから近接行列、度行列(どれだけの頂点が隣接しているか),Laplacian Matrixを生成
adjacency = gaussianDistance(X, sigma=0.1)
degree = degreeMatrix(adjacency)
L = diag(degree) - adjacency
# 正規化されたLaplacian Matrix
deg_05 = diag(degree ** -0.5)
L = deg_05.dot(L).dot(deg_05)
# Laplacian Matrix の固有値・固有ベクトルを求める
eigenvalues, eigenvectors = linalg.eig(L)
# 固有値を昇順に並べる, 零固有値集合の最初からK番目までの値を用いてデータセットの連結性を表現
idx = eigenvalues.argsort()
eigenvalues.sort()
evecs = eigenvectors[:, idx]
eigenvectors = evecs[:, 0:K]
print eigenvalues[0:K]
color_array = ['w', 'r', 'g', 'y']
makers = ['o', 'v']
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
# K-meansをそのまま利用して出力
centroids, labels = scipy.cluster.vq.kmeans2(X, K)
data = c_[X, labels]
ax1.set_title('K-means clustering')
ax1.set_ylabel('y')
ax1.set_xlabel('x')
for k in range(0, K):
print k
ax1.scatter(data[data[:, 2] == k, 0], data[data[:, 2] == k, 1], c=color_array[k], marker=makers[k])
# spectral clusteringを利用する(つまり固有ベクトルをK-meansする)
centroids, labels = scipy.cluster.vq.kmeans2(eigenvectors, K)
data = c_[X, labels]
ax2.set_title('Spectral clustering')
ax2.set_ylabel('y')
ax2.set_xlabel('x')
for k in range(0, K):
ax2.scatter(data[data[:, 2] == k, 0], data[data[:, 2] == k, 1], c=color_array[k], marker=makers[k])
# 固有ベクトルを出力
'''
data = c_[eigenvectors, labels]
ax = fig.add_subplot(133)
ax.set_title('K-eigenvectors')
ax.set_ylabel('Second Elements')
ax.set_xlabel('First Elements')
print eigenvectors
for k in range(0, K):
ax.scatter(data[data[:, 2] == k, 0], data[data[:, 2] == k, 1], c=color_array[k], marker=makers[k])
'''
pyplot.show()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment