Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Demonstrate sklearn RandomForestRegressor strange behavior
import numpy as np
from sklearn.ensemble import RandomForestRegressor
# simulate data
# 12 rows train, 6 rows test, 5 features, 3 columns for target
features = np.random.random((12, 5))
targets = np.random.random((12, 3))
test_features = np.random.random((6, 5))
rfr = RandomForestRegressor(random_state=42)
rfr.fit(features, targets)
preds = rfr.predict(features)
print('preds sum to 1?')
print(np.allclose(preds.sum(axis=1), np.ones(12)))
# normalize targets to sum to 1
norm_targets = targets / targets.sum(axis=1, keepdims=1)
rfr.fit(features, norm_targets)
preds = rfr.predict(features)
te_preds = rfr.predict(test_features)
print('predictions all sum to 1?')
print(np.allclose(preds.sum(axis=1), np.ones(12)))
print('test predictions all sum to 1?')
print(np.allclose(te_preds.sum(axis=1), np.ones(6)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment