Skip to content

Instantly share code, notes, and snippets.

Last active October 11, 2016 14:50
Show Gist options
  • Save jayelm/129382ba73ca2fb390c870350bed2d30 to your computer and use it in GitHub Desktop.
Save jayelm/129382ba73ca2fb390c870350bed2d30 to your computer and use it in GitHub Desktop.
Convenient Cluster Counter
Neat wrapper around a autoincrementing defaultdict. Most useful for assigning
unique numbers to unseen examples while clustering, but probably has other uses
as well.
Example usage
In [1]: cc = ClusterCounter()
In [2]: cc['setosa']
Out[2]: 0
In [3]: cc['virginica']
Out[3]: 1
In [4]: cc['versicolor']
Out[4]: 2
In [5]: cc['new_species']
Out[5]: 3
In [6]: cc['virginica']
Out[6]: 1
In [7]: cc.nclus
Out[7]: 4
Following is ClusterCounter2 only:
In [8]: 'new_species' in cc
Out[8]: True
In [9]: cc.to_dict()
Out[9]: {'new_species': 3, 'setosa': 0, 'versicolor': 2, 'virginica': 1}
from collections import defaultdict
class ClusterCounter(object):
A bare minimum cluster counter. Supports the dynamic assignment and
retrieval of numerical clusters, starting from 0.
def __init__(self):
self._cn = 0
def new_cluster():
c = self._cn
self._cn += 1 # Increment cnum before returning
return c
self._autocounter = defaultdict(new_cluster)
def __getitem__(self, val):
return self._autocounter[val]
def nclus(self):
return self._cn
class ClusterCounter2(object):
A more detailed implementation of a ClusterCounter, with additional
functionality and documentation.
def __init__(self, initial_n=0):
Initialize a cluster counter. Optional: specify a starting cluster
number (e.g. 1).
self._initial_n = initial_n
self._cn = initial_n
def new_cluster():
c = self._cn
self._cn += 1 # Increment cnum before returning
return c
self._autocounter = defaultdict(new_cluster)
def __getitem__(self, val):
Get the numerical cluster assignment for the given value. If the value
does not exist,
return self._autocounter[val]
def get(self, val):
"""An alias for cc[val]."""
return self.__getitem__(val)
def __contains__(self, key):
Return True if the ClusterCounter has an assignment for the given
key, False otherwise, *without* updating the defaultdict if not.
return key in self._autocounter
def to_dict(self):
"""Return the current state of the counter in builtin dict form."""
return dict(self._autocounter)
def keys(self):
"""Return the items whose clusters have already been assigned."""
return self._autocounter.keys()
def nclus(self):
"""Return the number of clusters currently observed."""
return self._cn - self._initial_n
def initial_n(self):
"""Return the number the ClusterCounter started at."""
return self._initial_n
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment