Skip to content

Instantly share code, notes, and snippets.

@fmarthoz
Last active December 16, 2022 09:16
Show Gist options
  • Save fmarthoz/8d4c55cb2fe9074c62876125628090c0 to your computer and use it in GitHub Desktop.
Save fmarthoz/8d4c55cb2fe9074c62876125628090c0 to your computer and use it in GitHub Desktop.
scaler = StandardScaler()
df=scaler.fit_transform(df)
kmeans_kwargs = {"init": "random","n_init": 20,"max_iter": 1000,"random_state": 1984}
cut_off=0.5
maxvars=3
kmin=2
kmax=8
cols=list(df.columns)
results_for_each_k=[]
vars_for_each_k={}
for k in range(kmin,kmax+1):
selected_variables=[]
while(len(selected_variables)<maxvars):
results=[]
for col in cols:
scols=[]
scols.extend(selected_variables)
scols.append(col)
kmeans = KMeans(n_clusters=k, **kmeans_kwargs)
kmeans.fit(df[scols])
results.append(silhouette_score(df[scols], kmeans.predict(df[scols])))
selected_var=cols[np.argmax(results)]
selected_variables.append(selected_var)
cols.remove(selected_var)
results_for_each_k.append(max(results))
vars_for_each_k[k]=selected_variables
best_k=np.argmax(results_for_each_k)+kmin
#you can also force a value for k
#best_k=3
selected_variables=vars_for_each_k[best_k]
kmeans = KMeans(n_clusters=best_k, **kmeans_kwargs)
kmeans.fit(df[selected_variables])
clusters=kmeans.predict(df[selected_variables])
%matplotlib inline
fig = plt.figure(figsize=(15,15))
#plt.rcParams['font.size'] = 22
ax = plt.axes(projection="3d")
z_points = df_[selected_variables[0]]
x_points = df_[selected_variables[1]]
y_points = df_[selected_variables[2]]
f1=ax.scatter3D(x_points, y_points, z_points, c=clusters,cmap='Accent',s=300);
ax.set_xlabel(selected_variables[0],fontsize = 20)
ax.set_ylabel(selected_variables[1],fontsize = 20)
ax.set_zlabel(selected_variables[2],fontsize = 20)
ax.legend(clusters)
plt.title('KMeans used on the Europe Datasets',fontsize = 24)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment