Skip to content

Instantly share code, notes, and snippets.

@kyleabeauchamp
Last active December 17, 2015 10:29
Show Gist options
  • Save kyleabeauchamp/5595013 to your computer and use it in GitHub Desktop.
Save kyleabeauchamp/5595013 to your computer and use it in GitHub Desktop.
Example of Wards Clustering
import matplotlib.pyplot as plt
import scipy.cluster
import numpy as np
import fastcluster
num_samples = 1000
similarities = np.loadtxt("./TanimotoMatrix0-1998")
distances0 = -1 * np.log(similarities / 1000.)
distances = scipy.cluster.hierarchy.distance.squareform(distances0)
#clustering = scipy.cluster.hierarchy.complete(distances)
clustering = fastcluster.ward(distances)
scipy.cluster.hierarchy.dendrogram(clustering)
assignments = scipy.cluster.hierarchy.fcluster(clustering, 2, criterion="maxclust")
import matplotlib.pyplot as plt
import scipy.cluster
import numpy as np
num_samples = 1000
x = np.random.normal(size=(num_samples,2))
x[0:num_samples / 2] += array([3,3])
clustering = scipy.cluster.hierarchy.ward(x)
scipy.cluster.hierarchy.dendrogram(clustering)
assignments = scipy.cluster.hierarchy.fcluster(clustering, 2, criterion="maxclust")
plt.figure()
plt.title("Density")
plt.hexbin(*x.T)
plt.figure()
plt.title("Two state ward clustering of data")
plt.plot(x[assignments==1, 0], x[assignments==1, 1], 'x', label="1")
plt.plot(x[assignments==2, 0], x[assignments==2, 1], 'x', label="2")
plt.legend()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment