Skip to content

Instantly share code, notes, and snippets.

@Jeffrey04
Created October 18, 2020 16:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Jeffrey04/1a5355455a1fc25470126f7a415041b3 to your computer and use it in GitHub Desktop.
Save Jeffrey04/1a5355455a1fc25470126f7a415041b3 to your computer and use it in GitHub Desktop.
from operator import itemgetter
from sklearn.datasets import load_boston
from sklearn.ensemble import (
BaggingRegressor,
GradientBoostingRegressor,
RandomForestRegressor,
)
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import MinMaxScaler
def best_pick(report):
idx = min(enumerate(report["test_score"]), key=itemgetter(1))[0]
return dict(
report,
train_score_best=report["train_score"][idx],
test_score_best=report["train_score"][idx],
)
ds = load_boston()
points_x = MinMaxScaler().fit_transform(ds.data)
sizes = (1, 5, 10, 20, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000)
print("BaggingRegressor")
print("================")
print()
print("forest_size,mse_training,mse_test")
for size in sizes:
result = best_pick(
cross_validate(
BaggingRegressor(n_estimators=size, n_jobs=-1),
points_x,
ds.target,
scoring=make_scorer(mean_squared_error),
n_jobs=1,
return_train_score=True,
return_estimator=False,
)
)
print(
f"{size},{result['train_score_best']},{result['test_score_best']}", flush=True
)
print()
print()
print("RandomForestRegressor")
print("=====================")
print()
print("forest_size,mse_training,mse_test")
for size in sizes:
result = best_pick(
cross_validate(
RandomForestRegressor(n_estimators=size, bootstrap=False, n_jobs=-1),
points_x,
ds.target,
scoring=make_scorer(mean_squared_error),
n_jobs=1,
return_train_score=True,
return_estimator=False,
)
)
print(
f"{size},{result['train_score_best']},{result['test_score_best']}", flush=True
)
print()
print()
print("GradientBoostingRegressor")
print("=========================")
print()
print("forest_size,mse_training,mse_test")
for size in sizes:
result = best_pick(
cross_validate(
GradientBoostingRegressor(n_estimators=size, learning_rate=0.001),
points_x,
ds.target,
scoring=make_scorer(mean_squared_error),
n_jobs=-1,
return_train_score=True,
return_estimator=False,
)
)
print(
f"{size},{result['train_score_best']},{result['test_score_best']}", flush=True
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment