Skip to content

Instantly share code, notes, and snippets.

@duarteocarmo
Created December 4, 2017 20:04
Show Gist options
  • Save duarteocarmo/81b755294bdfee9480871db6a9236d4f to your computer and use it in GitHub Desktop.
Save duarteocarmo/81b755294bdfee9480871db6a9236d4f to your computer and use it in GitHub Desktop.
from matplotlib.pyplot import figure, show, savefig, close
from toolbox_02450 import clusterplot
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
from Project_Clean_data import raw, header, standardize_this
import numpy as np
import seaborn as sns
# select attribute to predict
target_attribute_name = 'Dx'
target_index = list(header).index(target_attribute_name)
# prepare data
X = raw
y = X[:, target_index]
X = np.delete(raw, target_index, 1)
attributeNames = np.delete(header, target_index)
class_names = ['Positive', 'Negative']
N, M = X.shape
C = len(class_names)
X = standardize_this(X) # all
# Perform hierarchical/agglomerative clustering on data matrix
Method = 'ward'
Metric = 'euclidean'
Z = linkage(X, method=Method, metric=Metric)
# Compute and display clusters by thresholding the dendrogram
Maxclust = 2
cls = fcluster(Z, criterion='maxclust', t=Maxclust)
sns.set()
figure(1,figsize=(8,5))
clusterplot(X, cls.reshape(cls.shape[0],1), y=y)
savefig("HCluster.png",dpi=600)
# Display dendrogram
max_display_levels=8
figure(2,figsize=(8,5))
dendrogram(Z, truncate_mode='level', p=max_display_levels)
savefig("Dendrogram.png",dpi=600)
show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment