Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Created June 2, 2020 15:03
Show Gist options
  • Save ogrisel/8f1c4975173f2f3c540da0a7f6be304d to your computer and use it in GitHub Desktop.
Save ogrisel/8f1c4975173f2f3c540da0a7f6be304d to your computer and use it in GitHub Desktop.
@pytest.mark.parametrize("loss", ['huber', 'ls', 'lad', 'quantile'])
@pytest.mark.parametrize("use_sample_weight", [False, True])
def test_regressor_train_loss_convergence(loss, use_sample_weight):
rng = np.random.RandomState(42)
n_samples, n_features = 30, 5
n_estimators = 300
# Make random data (without duplicated samples) to make sure
# it's possible to build an invertible (overfitting) mapping
# from X to y that therefore should lead to a regression loss
# of zero if n_estimators is large enough.
X = rng.randn(n_samples, n_features)
y = rng.randn(n_samples)
if use_sample_weight:
sample_weight = rng.uniform(0, 10, size=n_samples)
sample_weight[sample_weight < 2] = 0
else:
sample_weight = None
gbr = GradientBoostingRegressor(
learning_rate=0.1,
max_depth=3,
loss=loss,
n_estimators=n_estimators,
n_iter_no_change=None, # make sure early stopping is disabled
)
gbr.fit(X, y, sample_weight=sample_weight)
train_loss = gbr.loss_(y, gbr._raw_predict(X),
sample_weight=sample_weight)
assert len(gbr.train_score_) == n_estimators
# assert gbr.train_score_[-1] == pytest.approx(train_loss)
assert train_loss < 1e-6, gbr.train_score_
@ogrisel
Copy link
Author

ogrisel commented Jun 2, 2020

train_loss is probably wrong but I do not know why. Maybe in the mean time we should just check:

assert gbr.train_score_[-1] < 1e-6, gbr.train_score_

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment