Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
multidimensional k-means cluster finder in python
import math
import random
class ClusterCenters(object):
"""
>> d = ((0,0,0), (0,1,0), (0, 2, 0), (1,2,0), (55, 55,50), (50,50,50), (-100,-100,-100), (-80,-80,-100), (60,60,50))
>> cc = ClusterCenters(d, 3)
[[55, 55, 50], [0, 1, 0], [-90, -90, -100]]
>> cc = ClusterCenters(d, 2)
[[55, 55, 50], [-30, -30, -34]]
>> cc = ClusterCenters(d, 2)
[[-90, -90, -100], [23, 24, 21]]
note the order can be random and there are a few solutions so this is not a doctest.
"""
def __init__(self, data, k):
"""
data is an iterable of tuples.
"""
self.found = 0
self.k = k
self.data = data
dim = self.dim = len(self.data[0])
maxi = lambda i: max(x[i] for x in self.data)
mini = lambda i: min(x[i] for x in self.data)
self.guesses = []
for guessi in range(k):
guess = []
for i in range(dim):
guess.append(random.triangular(mini(i), maxi(i)))
self.guesses.append(guess)
self.initial_convert_and_sort()
def initial_convert_and_sort(self):
self.data = self.get_data(self.data)
self.unpack_new_centers()
def get_data(self, points):
new_data = []
for point in points:
distances = []
for guess in self.guesses:
distance = math.sqrt(
sum((point[i] - guess[i])**2 for i in range(self.dim))
)
distances.append(distance)
ki = distances.index(min(distances))
d = {
"ki": ki,
"point": point
}
new_data.append(d)
return new_data
def unpack_new_centers(self):
for ki, guess in enumerate(self.guesses):
points = [d["point"] for d in self.data if d["ki"] == ki]
new_guess = []
if points:
for i in range(self.dim):
iguess = sum(point[i] for point in points) / len(points)
new_guess.insert(i, iguess)
self.guesses[ki] = new_guess
else:
mini = lambda i: min(d["point"][i] for d in self.data)
maxi = lambda i: max(d["point"][i] for d in self.data)
self.guesses[ki] = [random.triangular(mini(i), maxi(i)) for i in range(self.dim)]
self.correspond_nearest()
def correspond_nearest(self):
points = [d["point"] for d in self.data]
new_data = self.get_data(points)
if new_data == self.data:
self.found += 1
if self.found > 10:
print self.guesses
else:
self.unpack_new_centers()
else:
self.data = new_data
self.unpack_new_centers()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.