Last active
June 12, 2019 13:11
-
-
Save spikar/798b735c99abe7e66e5869efde85eb4e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import datasets, cluster | |
#Load Dataset | |
X = datasets.load_iris().data[:10] | |
#Specify the parameters for clustering. | |
#'ward' linkage is default but 'complete' and 'average' can be used too. | |
clust = cluster.AgglomerateClustering(n_clusters = 3, linkage = 'ward') | |
labels = clust.fit_predict(X) | |
#Labels now contain an array showing which cluster each point belong to | |
# [1, 0, 0, 1, 2, 0, 1, 0, 0] | |
# Hierarchical clustering using complete linkage | |
# TODO: Create an instance of AgglomerativeClustering with the appropriate parameters | |
complete = AgglomerativeClustering(n_clusters=3, linkage = 'complete') | |
# Fit & predict | |
# TODO: Make AgglomerativeClustering fit the dataset and predict the cluster labels | |
complete_pred = complete.fit_predict(X) | |
# Hierarchical clustering using average linkage | |
# TODO: Create an instance of AgglomerativeClustering with the appropriate parameters | |
avg = AgglomerativeClustering(n_clusters=3, linkage = 'average') | |
# Fit & predict | |
# TODO: Make AgglomerativeClustering fit the dataset and predict the cluster labels | |
avg_pred = avg.fit_predict(X) | |
#Cluster validation | |
from sklearn.metrics import adjusted_rand_score | |
ward_ar_score = adjusted_rand_score(X, ward_pred) | |
# TODO: Calculated the adjusted Rand score for the complete linkage clustering labels | |
complete_ar_score = adjusted_rand_score(X, complete_pred) | |
# TODO: Calculated the adjusted Rand score for the average linkage clustering labels | |
avg_ar_score = adjusted_rand_score(X, avg_pred) | |
print( "Scores: \nWard:", ward_ar_score,"\nComplete: ", complete_ar_score, "\nAverage: ", avg_ar_score) | |
#Using the scipy library | |
from scipy.cluster.heirarchy import dendogram, ward, single | |
import matplotlib.pyplot as plt | |
#Perform Clustering | |
linkage_matrix = ward(X) | |
#Plot Dendogram | |
dendogram(linkage_matrix) | |
plt.show() | |
#TODO: create an instance of DBSCAN | |
dbscan = cluster.DBSCAN() | |
#TODO: use DBSCAN's fit_predict to return clustering labels for dataset_1 | |
clustering_labels_1 = dbscan.fit_predict(dataset_1) | |
# Plot clustering | |
helper.plot_clustered_dataset(dataset_1, clustering_labels_1) | |
# Plot clustering with neighborhoods | |
helper.plot_clustered_dataset(dataset_1, clustering_labels_1, neighborhood=True) | |
# TODO: increase the value of epsilon to allow DBSCAN to find three clusters in the dataset | |
epsilon=1.8 | |
# Cluster | |
dbscan = cluster.DBSCAN(eps=epsilon) | |
clustering_labels_2 = dbscan.fit_predict(dataset_1) | |
# Plot | |
helper.plot_clustered_dataset(dataset_1, clustering_labels_2, neighborhood=True, epsilon=epsilon) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment