Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Created September 7, 2011 22:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ogrisel/1202006 to your computer and use it in GitHub Desktop.
Save ogrisel/1202006 to your computer and use it in GitHub Desktop.
V-Measure and adjustment for chance

This is an experiment to highlight the dependency of the V-Measure value on the number of clusters of 2 independent uniform labelings for a finite number of samples.

Intuitively it seems that for finite number of samples the V-Measure is victim of some kind of birthday paradox that naive users might not be aware of.

Even if the maximum number of clusters considered (e.g. 10) is small with respect to the number of samples (e.g. 5000), the V-Measure of 2 independent uniform labeling is still noticibly increasing with the number of clusters in at least one of the labeling.

Careful counting number of possible combinations is required to be able to come up with an adjusted for chance variant of V-Measure that would counter this effect.

"""Script demonstrating the lack of adjustment for randomness of V-Measure
Here we make 2 independent clustering labels labels_a and labels_b for various
values if n_samples and k the number of clusters in both the a and b labelings.
"""
import numpy as np
from sklearn.metrics import v_measure_score
def v_measures_same_k(n_samples=100, k_range=range(2, 100), n_runs=10,
seed=42):
random_labels = np.random.RandomState(seed).random_integers
scores = np.zeros((len(k_range), n_runs))
for i in range(n_runs):
for j, k in enumerate(k_range):
labels_a = random_labels(low=0, high=k - 1, size=n_samples)
labels_b = random_labels(low=0, high=k - 1, size=n_samples)
scores[j, i] = v_measure_score(labels_a, labels_b)
return scores
def v_measures_fixed_k_a(n_samples=100, k_a=10, k_b_range=range(2, 100),
n_runs=10, seed=42):
random_labels = np.random.RandomState(seed).random_integers
scores = np.zeros((len(k_b_range), n_runs))
for i in range(n_runs):
for j, k_b in enumerate(k_b_range):
labels_a = random_labels(low=0, high=k_a - 1, size=n_samples)
labels_b = random_labels(low=0, high=k_b - 1, size=n_samples)
scores[j, i] = v_measure_score(labels_a, labels_b)
return scores
if __name__ == '__main__':
import pylab as pl
n_samples = 100
k_range = range(2, n_samples + 1)
scores = v_measures_same_k(n_samples=n_samples, k_range=k_range)
mean = scores.mean(axis=1)
std = scores.std(axis=1)
pl.errorbar(k_range, mean, yerr=std)
pl.title("V-Measures for 2 uniform labelings with various centers\n"
"and fixed total number of samples to label %d." % n_samples)
pl.show()
k_a = 20
scores = v_measures_fixed_k_a(n_samples=n_samples, k_a=k_a, k_b_range=k_range)
mean = scores.mean(axis=1)
std = scores.std(axis=1)
pl.errorbar(k_range, mean, yerr=std)
pl.title("V-Measures for 2 uniform labelings, one with various centers\n"
"and fixed number of centers %d for the other and\n"
"total number of samples to label %d." % (k_a, n_samples))
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment