Berkodev/lol_kmeans_positions_full.py

## lol_kmeans_positions_full.py
from numpy import genfromtxt
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

## Data preprocessing
# Load csv dataset into a numpy array
X = genfromtxt('/path/to/dataset', delimiter=',')

# Use MinMaxScaler to scale the data
sc = MinMaxScaler()
X = sc.fit_transform(X)

## Validation
squared_distances = []
for k in range(2, 15):
    # Initialize and fit {{k}}-Means to our dataset
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    # Keep the inertia of current model
    squared_distances.append(kmeans.inertia_)

# Use matplotlib to plot inertia against k
fig = plt.figure(figsize=(15, 5), edgecolor='red')
plt.plot(range(2, 15), squared_distances)
plt.title('Elbow curve')
plt.show()

## Training
## Although we've trained models for values in k 2 through 14 for validation,
## we didn't save them due to memory concerns. Thus:
# Fit a 5-means model to our dataset.
kmeans = KMeans(n_clusters=5, random_state=42)
pred = kmeans.fit_predict(X)
	from numpy import genfromtxt
	from sklearn.preprocessing import MinMaxScaler
	from sklearn.cluster import KMeans
	import matplotlib.pyplot as plt

	## Data preprocessing
	# Load csv dataset into a numpy array
	X = genfromtxt('/path/to/dataset', delimiter=',')

	# Use MinMaxScaler to scale the data
	sc = MinMaxScaler()
	X = sc.fit_transform(X)

	## Validation
	squared_distances = []
	for k in range(2, 15):
	# Initialize and fit {{k}}-Means to our dataset
	kmeans = KMeans(n_clusters=k, random_state=42)
	kmeans.fit(X)
	# Keep the inertia of current model
	squared_distances.append(kmeans.inertia_)

	# Use matplotlib to plot inertia against k
	fig = plt.figure(figsize=(15, 5), edgecolor='red')
	plt.plot(range(2, 15), squared_distances)
	plt.title('Elbow curve')
	plt.show()

	## Training
	## Although we've trained models for values in k 2 through 14 for validation,
	## we didn't save them due to memory concerns. Thus:
	# Fit a 5-means model to our dataset.
	kmeans = KMeans(n_clusters=5, random_state=42)
	pred = kmeans.fit_predict(X)