Skip to content

Instantly share code, notes, and snippets.

@spikar
Last active June 12, 2019 13:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save spikar/798b735c99abe7e66e5869efde85eb4e to your computer and use it in GitHub Desktop.
Save spikar/798b735c99abe7e66e5869efde85eb4e to your computer and use it in GitHub Desktop.
from sklearn import datasets, cluster
#Load Dataset
X = datasets.load_iris().data[:10]
#Specify the parameters for clustering.
#'ward' linkage is default but 'complete' and 'average' can be used too.
clust = cluster.AgglomerateClustering(n_clusters = 3, linkage = 'ward')
labels = clust.fit_predict(X)
#Labels now contain an array showing which cluster each point belong to
# [1, 0, 0, 1, 2, 0, 1, 0, 0]
# Hierarchical clustering using complete linkage
# TODO: Create an instance of AgglomerativeClustering with the appropriate parameters
complete = AgglomerativeClustering(n_clusters=3, linkage = 'complete')
# Fit & predict
# TODO: Make AgglomerativeClustering fit the dataset and predict the cluster labels
complete_pred = complete.fit_predict(X)
# Hierarchical clustering using average linkage
# TODO: Create an instance of AgglomerativeClustering with the appropriate parameters
avg = AgglomerativeClustering(n_clusters=3, linkage = 'average')
# Fit & predict
# TODO: Make AgglomerativeClustering fit the dataset and predict the cluster labels
avg_pred = avg.fit_predict(X)
#Cluster validation
from sklearn.metrics import adjusted_rand_score
ward_ar_score = adjusted_rand_score(X, ward_pred)
# TODO: Calculated the adjusted Rand score for the complete linkage clustering labels
complete_ar_score = adjusted_rand_score(X, complete_pred)
# TODO: Calculated the adjusted Rand score for the average linkage clustering labels
avg_ar_score = adjusted_rand_score(X, avg_pred)
print( "Scores: \nWard:", ward_ar_score,"\nComplete: ", complete_ar_score, "\nAverage: ", avg_ar_score)
#Using the scipy library
from scipy.cluster.heirarchy import dendogram, ward, single
import matplotlib.pyplot as plt
#Perform Clustering
linkage_matrix = ward(X)
#Plot Dendogram
dendogram(linkage_matrix)
plt.show()
#TODO: create an instance of DBSCAN
dbscan = cluster.DBSCAN()
#TODO: use DBSCAN's fit_predict to return clustering labels for dataset_1
clustering_labels_1 = dbscan.fit_predict(dataset_1)
# Plot clustering
helper.plot_clustered_dataset(dataset_1, clustering_labels_1)
# Plot clustering with neighborhoods
helper.plot_clustered_dataset(dataset_1, clustering_labels_1, neighborhood=True)
# TODO: increase the value of epsilon to allow DBSCAN to find three clusters in the dataset
epsilon=1.8
# Cluster
dbscan = cluster.DBSCAN(eps=epsilon)
clustering_labels_2 = dbscan.fit_predict(dataset_1)
# Plot
helper.plot_clustered_dataset(dataset_1, clustering_labels_2, neighborhood=True, epsilon=epsilon)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment