Skip to content

Instantly share code, notes, and snippets.

@dkn22
Created September 1, 2018 17:17
Show Gist options
  • Save dkn22/a005835cb18321036019c7e4d35bd6ae to your computer and use it in GitHub Desktop.
Save dkn22/a005835cb18321036019c7e4d35bd6ae to your computer and use it in GitHub Desktop.
Max-Spacing K-Clustering Algorithm
class UnionFind:
def __init__(self, ids):
self._id = {i: i for i in ids} # pointer to the leader
self.sizes = {i: 1 for i in ids}
self.n_components = len(set(self._id))
def _root(self, i):
j = i
while (j != self._id[j]):
self._id[j] = self._id[self._id[j]]
j = self._id[j]
return j
def find(self, p):
return self._root(p)
def union(self, p, q):
i = self._root(p)
j = self._root(q)
if (self.sizes[i] < self.sizes[j]):
self._id[i] = j
self.sizes[j] = self.sizes[j] + self.sizes.pop(i)
else:
self._id[j] = i
self.sizes[i] = self.sizes[i] + self.sizes.pop(j)
self.n_components -= 1
def max_spacing_k_clustering(k, graph):
uf = UnionFind(graph.nodes)
edges = sorted(graph.edges, key=lambda e: e.cost)
while uf.n_components > k:
max_edge = edges.pop(0) # pre-sorted
n1, n2, cost = max_edge
if uf.find(n1) != uf.find(n2):
uf.union(n1, n2)
spacings = []
# remaining edge costs will contain spacings
while len(spacings) < uf.n_components:
max_edge = edges.pop(0)
n1, n2, cost = max_edge
if uf.find(n1) != uf.find(n2):
spacings.append(cost)
return spacings[0], uf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment