Benchmarking `sklearn.semi_supervised` `n_iter_` as a function of model and data characteristics
import numpy as np | |
from sklearn import datasets | |
from sklearn.semi_supervised import LabelPropagation, LabelSpreading | |
###for n_samples in [20, 200, 2000, 20000]: | |
### X, y = datasets.make_classification(n_samples=n_samples, n_classes=3, n_informative=3) | |
for (X, y) in [datasets.load_iris(return_X_y=True)]: | |
for model in [LabelPropagation(max_iter=1000), | |
#LabelSpreading(alpha=0.01), | |
#LabelSpreading(alpha=0.1), | |
#LabelSpreading(alpha=0.3) | |
]: | |
for p in [.1, .15, .2, .25, .5, .75]: | |
n_iter = [] | |
for i in range(20): | |
random_unlabeled_points = np.random.rand(len(y)) < p | |
labels = np.copy(y) | |
labels[random_unlabeled_points] = -1 | |
model.fit(X, labels) | |
n_iter.append(model.n_iter_) | |
print(len(y), p, model.alpha, np.mean(n_iter), np.std(n_iter), sep='\t') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment