-
-
Save Jeffrey04/1a5355455a1fc25470126f7a415041b3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from operator import itemgetter | |
from sklearn.datasets import load_boston | |
from sklearn.ensemble import ( | |
BaggingRegressor, | |
GradientBoostingRegressor, | |
RandomForestRegressor, | |
) | |
from sklearn.metrics import make_scorer, mean_squared_error | |
from sklearn.model_selection import cross_validate | |
from sklearn.preprocessing import MinMaxScaler | |
def best_pick(report): | |
idx = min(enumerate(report["test_score"]), key=itemgetter(1))[0] | |
return dict( | |
report, | |
train_score_best=report["train_score"][idx], | |
test_score_best=report["train_score"][idx], | |
) | |
ds = load_boston() | |
points_x = MinMaxScaler().fit_transform(ds.data) | |
sizes = (1, 5, 10, 20, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000) | |
print("BaggingRegressor") | |
print("================") | |
print() | |
print("forest_size,mse_training,mse_test") | |
for size in sizes: | |
result = best_pick( | |
cross_validate( | |
BaggingRegressor(n_estimators=size, n_jobs=-1), | |
points_x, | |
ds.target, | |
scoring=make_scorer(mean_squared_error), | |
n_jobs=1, | |
return_train_score=True, | |
return_estimator=False, | |
) | |
) | |
print( | |
f"{size},{result['train_score_best']},{result['test_score_best']}", flush=True | |
) | |
print() | |
print() | |
print("RandomForestRegressor") | |
print("=====================") | |
print() | |
print("forest_size,mse_training,mse_test") | |
for size in sizes: | |
result = best_pick( | |
cross_validate( | |
RandomForestRegressor(n_estimators=size, bootstrap=False, n_jobs=-1), | |
points_x, | |
ds.target, | |
scoring=make_scorer(mean_squared_error), | |
n_jobs=1, | |
return_train_score=True, | |
return_estimator=False, | |
) | |
) | |
print( | |
f"{size},{result['train_score_best']},{result['test_score_best']}", flush=True | |
) | |
print() | |
print() | |
print("GradientBoostingRegressor") | |
print("=========================") | |
print() | |
print("forest_size,mse_training,mse_test") | |
for size in sizes: | |
result = best_pick( | |
cross_validate( | |
GradientBoostingRegressor(n_estimators=size, learning_rate=0.001), | |
points_x, | |
ds.target, | |
scoring=make_scorer(mean_squared_error), | |
n_jobs=-1, | |
return_train_score=True, | |
return_estimator=False, | |
) | |
) | |
print( | |
f"{size},{result['train_score_best']},{result['test_score_best']}", flush=True | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment