Skip to content

Instantly share code, notes, and snippets.

@davidandrzej
Created May 3, 2011 17:57
Show Gist options
  • Save davidandrzej/953840 to your computer and use it in GitHub Desktop.
Save davidandrzej/953840 to your computer and use it in GitHub Desktop.
Example usage of blockviz.py
"""
Example blockviz usage with synthetic generated data
Requires scaledimage.py and blockviz.py
David Andrzejewski
"""
import numpy as NP
import numpy.random as NPR
import matplotlib.pyplot as P
import scipy.spatial.distance as DIST
import matplotlib.ticker as MT
import blockviz as BV
import scaledimage as SI
#
# Generate synthetic test dataset
#
(nclust, nfeat, ndata) = (3, 5, 20)
# Generate cluster means
clustmeans = NPR.standard_normal((nfeat, nclust))
# Sample cluster assignments for each data point
dataclust = NPR.randint(0, nclust, (ndata,))
# Generate data points
data = NP.zeros((nfeat, ndata))
covar = NP.identity(nfeat) # diagonal covariance
for clust in range(nclust):
clustidx = NP.where(dataclust == clust)[0]
datapoints = NPR.multivariate_normal(clustmeans[:,clust],
covar,
(len(clustidx),)).T
data[:,clustidx] = datapoints
# Calculate affinity as logistic transform of negative cosine distance
cosdist = DIST.squareform(DIST.pdist(data.T, metric='cosine'))
vecLog = NP.vectorize(BV.logistic)
affinity = vecLog(-1 * cosdist)
# Do non-block visualization of similarity
P.subplot(1,2,1)
SI.scaledimage(affinity, pixwidth=3, ax=P.gca())
P.title('Raw affinity matrix')
# Do block visualization
P.subplot(1,2,2)
BV.blockviz(affinity,
nclust + 1, # why + 1 ? see below
ax=P.gca())
# Draw nice ticklabels
labels = [str(i) for i in range(ndata)]
P.gca().yaxis.set_major_locator(MT.IndexLocator(3,1))
P.gca().yaxis.set_ticklabels(list(reversed(labels)))
P.gca().xaxis.set_major_locator(MT.IndexLocator(3,1))
P.gca().xaxis.set_ticklabels(labels, rotation=90)
#
# On this synth data it seems that SpectralClustering
# cannot resist making exactly one singleton cluster...?
#
# So to get 'right' cluster picture, we tell it
# we want the true number of clusters + 1
#
P.title('After spectral clustering')
P.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment