Skip to content

Instantly share code, notes, and snippets.

@ImadDabbura
Created August 3, 2018 20:33
Show Gist options
  • Save ImadDabbura/467213acb7d445c1bcad832c1017df0d to your computer and use it in GitHub Desktop.
Save ImadDabbura/467213acb7d445c1bcad832c1017df0d to your computer and use it in GitHub Desktop.
# Build random forest classifier
methods_data = {"Original": (X_train, y_train),
"Upsampled": (X_train_u, y_train_u),
"Downsampled": (X_train_d, y_train_d)}
for method in methods_data.keys():
pip_rf = make_pipeline(StandardScaler(),
RandomForestClassifier(n_estimators=500,
class_weight="balanced",
random_state=123))
hyperparam_grid = {
"randomforestclassifier__n_estimators": [10, 50, 100, 500],
"randomforestclassifier__max_features": ["sqrt", "log2", 0.4, 0.5],
"randomforestclassifier__min_samples_leaf": [1, 3, 5],
"randomforestclassifier__criterion": ["gini", "entropy"]}
gs_rf = GridSearchCV(pip_rf,
hyperparam_grid,
scoring="f1",
cv=10,
n_jobs=-1)
gs_rf.fit(methods_data[method][0], methods_data[method][1])
print(f"\033[1m\033[0mThe best hyperparameters for {method} data:")
for hyperparam in gs_rf.best_params_.keys():
print(hyperparam[hyperparam.find("__") + 2:], ": ", gs_rf.best_params_[hyperparam])
print(f"\033[1m\033[94mBest 10-folds CV f1-score: {gs_rf.best_score_ * 100:.2f}%.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment