spikar/agglomerate_clustering.py

## agglomerate_clustering.py
from sklearn import datasets, cluster

#Load Dataset
X = datasets.load_iris().data[:10]

#Specify the parameters for clustering.
#'ward' linkage is default but 'complete' and 'average' can be used too.
clust = cluster.AgglomerateClustering(n_clusters = 3, linkage = 'ward')

labels = clust.fit_predict(X)

#Labels now contain an array showing which cluster each point belong to
# [1, 0, 0, 1, 2, 0, 1, 0, 0]


# Hierarchical clustering using complete linkage
# TODO: Create an instance of AgglomerativeClustering with the appropriate parameters
complete = AgglomerativeClustering(n_clusters=3,  linkage = 'complete')
# Fit & predict
# TODO: Make AgglomerativeClustering fit the dataset and predict the cluster labels
complete_pred = complete.fit_predict(X)

# Hierarchical clustering using average linkage
# TODO: Create an instance of AgglomerativeClustering with the appropriate parameters
avg = AgglomerativeClustering(n_clusters=3, linkage = 'average')
# Fit & predict
# TODO: Make AgglomerativeClustering fit the dataset and predict the cluster labels
avg_pred = avg.fit_predict(X)

#Cluster validation
from sklearn.metrics import adjusted_rand_score

ward_ar_score = adjusted_rand_score(X, ward_pred)

# TODO: Calculated the adjusted Rand score for the complete linkage clustering labels
complete_ar_score = adjusted_rand_score(X, complete_pred)

# TODO: Calculated the adjusted Rand score for the average linkage clustering labels
avg_ar_score = adjusted_rand_score(X, avg_pred)

print( "Scores: \nWard:", ward_ar_score,"\nComplete: ", complete_ar_score, "\nAverage: ", avg_ar_score)

#Using the scipy library
from scipy.cluster.heirarchy import dendogram, ward, single
import matplotlib.pyplot as plt

#Perform Clustering
linkage_matrix = ward(X)

#Plot Dendogram
dendogram(linkage_matrix)

plt.show()

#TODO: create an instance of DBSCAN
dbscan = cluster.DBSCAN()
#TODO: use DBSCAN's fit_predict to return clustering labels for dataset_1
clustering_labels_1 = dbscan.fit_predict(dataset_1)

# Plot clustering
helper.plot_clustered_dataset(dataset_1, clustering_labels_1)

# Plot clustering with neighborhoods
helper.plot_clustered_dataset(dataset_1, clustering_labels_1, neighborhood=True)

# TODO: increase the value of epsilon to allow DBSCAN to find three clusters in the dataset
epsilon=1.8

# Cluster
dbscan = cluster.DBSCAN(eps=epsilon)
clustering_labels_2 = dbscan.fit_predict(dataset_1)

# Plot
helper.plot_clustered_dataset(dataset_1, clustering_labels_2, neighborhood=True, epsilon=epsilon)
	from sklearn import datasets, cluster

	#Load Dataset
	X = datasets.load_iris().data[:10]

	#Specify the parameters for clustering.
	#'ward' linkage is default but 'complete' and 'average' can be used too.
	clust = cluster.AgglomerateClustering(n_clusters = 3, linkage = 'ward')

	labels = clust.fit_predict(X)

	#Labels now contain an array showing which cluster each point belong to
	# [1, 0, 0, 1, 2, 0, 1, 0, 0]


	# Hierarchical clustering using complete linkage
	# TODO: Create an instance of AgglomerativeClustering with the appropriate parameters
	complete = AgglomerativeClustering(n_clusters=3, linkage = 'complete')
	# Fit & predict
	# TODO: Make AgglomerativeClustering fit the dataset and predict the cluster labels
	complete_pred = complete.fit_predict(X)

	# Hierarchical clustering using average linkage
	# TODO: Create an instance of AgglomerativeClustering with the appropriate parameters
	avg = AgglomerativeClustering(n_clusters=3, linkage = 'average')
	# Fit & predict
	# TODO: Make AgglomerativeClustering fit the dataset and predict the cluster labels
	avg_pred = avg.fit_predict(X)

	#Cluster validation
	from sklearn.metrics import adjusted_rand_score

	ward_ar_score = adjusted_rand_score(X, ward_pred)

	# TODO: Calculated the adjusted Rand score for the complete linkage clustering labels
	complete_ar_score = adjusted_rand_score(X, complete_pred)

	# TODO: Calculated the adjusted Rand score for the average linkage clustering labels
	avg_ar_score = adjusted_rand_score(X, avg_pred)

	print( "Scores: \nWard:", ward_ar_score,"\nComplete: ", complete_ar_score, "\nAverage: ", avg_ar_score)

	#Using the scipy library
	from scipy.cluster.heirarchy import dendogram, ward, single
	import matplotlib.pyplot as plt

	#Perform Clustering
	linkage_matrix = ward(X)

	#Plot Dendogram
	dendogram(linkage_matrix)

	plt.show()

	#TODO: create an instance of DBSCAN
	dbscan = cluster.DBSCAN()
	#TODO: use DBSCAN's fit_predict to return clustering labels for dataset_1
	clustering_labels_1 = dbscan.fit_predict(dataset_1)

	# Plot clustering
	helper.plot_clustered_dataset(dataset_1, clustering_labels_1)

	# Plot clustering with neighborhoods
	helper.plot_clustered_dataset(dataset_1, clustering_labels_1, neighborhood=True)

	# TODO: increase the value of epsilon to allow DBSCAN to find three clusters in the dataset
	epsilon=1.8

	# Cluster
	dbscan = cluster.DBSCAN(eps=epsilon)
	clustering_labels_2 = dbscan.fit_predict(dataset_1)

	# Plot
	helper.plot_clustered_dataset(dataset_1, clustering_labels_2, neighborhood=True, epsilon=epsilon)