Created
December 4, 2017 20:04
-
-
Save duarteocarmo/81b755294bdfee9480871db6a9236d4f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from matplotlib.pyplot import figure, show, savefig, close | |
from toolbox_02450 import clusterplot | |
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram | |
from Project_Clean_data import raw, header, standardize_this | |
import numpy as np | |
import seaborn as sns | |
# select attribute to predict | |
target_attribute_name = 'Dx' | |
target_index = list(header).index(target_attribute_name) | |
# prepare data | |
X = raw | |
y = X[:, target_index] | |
X = np.delete(raw, target_index, 1) | |
attributeNames = np.delete(header, target_index) | |
class_names = ['Positive', 'Negative'] | |
N, M = X.shape | |
C = len(class_names) | |
X = standardize_this(X) # all | |
# Perform hierarchical/agglomerative clustering on data matrix | |
Method = 'ward' | |
Metric = 'euclidean' | |
Z = linkage(X, method=Method, metric=Metric) | |
# Compute and display clusters by thresholding the dendrogram | |
Maxclust = 2 | |
cls = fcluster(Z, criterion='maxclust', t=Maxclust) | |
sns.set() | |
figure(1,figsize=(8,5)) | |
clusterplot(X, cls.reshape(cls.shape[0],1), y=y) | |
savefig("HCluster.png",dpi=600) | |
# Display dendrogram | |
max_display_levels=8 | |
figure(2,figsize=(8,5)) | |
dendrogram(Z, truncate_mode='level', p=max_display_levels) | |
savefig("Dendrogram.png",dpi=600) | |
show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment