Skip to content

Instantly share code, notes, and snippets.

@merylldindin
Last active November 25, 2019 05:17
Show Gist options
  • Save merylldindin/a11d44a97738d3bdefa28e59deaa032c to your computer and use it in GitHub Desktop.
Save merylldindin/a11d44a97738d3bdefa28e59deaa032c to your computer and use it in GitHub Desktop.
ToMaTo Clustering
def define_clusters(lst, fil, neighbors):
# lst -> ordered list of indexes
# fil -> mapped dictionnary of filtration values with indexes
# neighbors -> number of closest elements to consider per query
unf = UnionFind()
for idx in lst:
grp, srt = [], np.where(lst == idx)[0][0]
# kdt -> neighboring graph defined on x (values)
nei = self.kdt.query([self.x[idx]], neighbors, return_distance=False)
for ele in nei[0][1:]:
if np.where(lst == ele)[0][0] < srt: grp.append(ele)
if len(grp) == 0: unf.insert_objects([idx])
else:
parent = grp[np.asarray([fil[j] for j in grp]).argmax()]
unf.union(parent, idx)
for ele in grp:
root = unf.find(ele)
mini = min(fil[parent], fil[root])
if root != parent and mini < fil[idx] + tau:
unf.union(parent, root)
parent = unf.find(root)
return unf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment