Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mattharrison/d50ef81280e8180cfca54fc0552f060c to your computer and use it in GitHub Desktop.
Save mattharrison/d50ef81280e8180cfca54fc0552f060c to your computer and use it in GitHub Desktop.
hyperopt tuning
from sklearn.metrics import roc_auc_score
from hyperopt import hp, Trials, fmin, tpe
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import accuracy_score, roc_auc_score
from typing import Any, Dict, Union
import xgboost as xgb
def hyperparameter_tuning(space: Dict[str, Union[float, int]],
X_train: pd.DataFrame, y_train: pd.Series,
X_test: pd.DataFrame, y_test: pd.Series,
early_stopping_rounds: int=50,
metric:callable=accuracy_score) -> Dict[str, Any]:
"""
Perform hyperparameter tuning for an XGBoost classifier.
This function takes a dictionary of hyperparameters, training
and test data, and an optional value for early stopping rounds,
and returns a dictionary with the loss and model resulting from
the tuning process. The model is trained using the training
data and evaluated on the test data. The loss is computed as
the negative of the accuracy score.
Parameters
----------
space : Dict[str, Union[float, int]]
A dictionary of hyperparameters for the XGBoost classifier.
X_train : pd.DataFrame
The training data.
y_train : pd.Series
The training target.
X_test : pd.DataFrame
The test data.
y_test : pd.Series
The test target.
early_stopping_rounds : int, optional
The number of early stopping rounds to use. The default value
is 50.
metric : callable
Metric to maximize. Default is accuracy
Returns
-------
Dict[str, Any]
A dictionary with the loss and model resulting from the
tuning process. The loss is a float, and the model is an
XGBoost classifier.
"""
int_vals = ['max_depth', 'reg_alpha']
space = {k: (int(val) if k in int_vals else val)
for k,val in space.items()}
space['early_stopping_rounds'] = early_stopping_rounds
model = xgb.XGBClassifier(**space)
evaluation = [(X_train, y_train),
(X_test, y_test)]
model.fit(X_train, y_train,
eval_set=evaluation,
verbose=False)
pred = model.predict(X_test)
score = metric(y_test, pred)
return {'loss': -score, 'status': STATUS_OK, 'model': model}
params = {'random_state': 42}
rounds = [{'max_depth': hp.quniform('max_depth', 1, 9, 1), # tree
'min_child_weight': hp.loguniform('min_child_weight', -2, 3)},
{'subsample': hp.uniform('subsample', 0.5, 1), # stochastic
'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1)},
{'gamma': hp.loguniform('gamma', -10, 10)}, # regularization
{'learning_rate': hp.loguniform('learning_rate', -7, 0)} # boosting
]
for round in rounds:
params = {**params, **round}
trials = Trials()
best = fmin(fn=lambda space: hyperparameter_tuning(
space, X_kag_train, y_kag_train, X_kag_test, y_kag_test,
metric=metrics.accuracy_score),
space=params,
algo=tpe.suggest,
max_evals=40,
trials=trials,
)
params = {**params, **best}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment