Last active
June 5, 2020 18:25
-
-
Save Berkodev/a629dfe3b4f112c5f866eb01f916027e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numpy import genfromtxt | |
from sklearn.preprocessing import MinMaxScaler | |
from sklearn.cluster import KMeans | |
import matplotlib.pyplot as plt | |
## Data preprocessing | |
# Load csv dataset into a numpy array | |
X = genfromtxt('/path/to/dataset', delimiter=',') | |
# Use MinMaxScaler to scale the data | |
sc = MinMaxScaler() | |
X = sc.fit_transform(X) | |
## Validation | |
squared_distances = [] | |
for k in range(2, 15): | |
# Initialize and fit {{k}}-Means to our dataset | |
kmeans = KMeans(n_clusters=k, random_state=42) | |
kmeans.fit(X) | |
# Keep the inertia of current model | |
squared_distances.append(kmeans.inertia_) | |
# Use matplotlib to plot inertia against k | |
fig = plt.figure(figsize=(15, 5), edgecolor='red') | |
plt.plot(range(2, 15), squared_distances) | |
plt.title('Elbow curve') | |
plt.show() | |
## Training | |
## Although we've trained models for values in k 2 through 14 for validation, | |
## we didn't save them due to memory concerns. Thus: | |
# Fit a 5-means model to our dataset. | |
kmeans = KMeans(n_clusters=5, random_state=42) | |
pred = kmeans.fit_predict(X) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment