Skip to content

Instantly share code, notes, and snippets.

@yudhiesh
Last active March 7, 2022 05:32
Show Gist options
  • Save yudhiesh/346367b6b75fc97f8ab0e15c11101ec7 to your computer and use it in GitHub Desktop.
Save yudhiesh/346367b6b75fc97f8ab0e15c11101ec7 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import lightgbm as lgb
import optuna
from optuna.integration import LightGBMPruningCallback
from sklearn.metrics import log_loss, roc_auc_score
def get_features_labels(df, target_columns):
"""
Splits the dataframe into features and labels
returns: features and labels
"""
feature_columns = [columns for columns in df.columns.tolist() if columns != target_columns]
X = df.loc[:, feature_columns]
y = df.loc[:, target_columns]
return X, y
def get_train_valid(train, valid):
"""
Extracts the features and labels from the train and valid datasets
returns: the features and labels for train and valid
"""
X_train, y_train = get_features_labels(train)
X_valid, y_valid = get_features_labels(valid)
return (X_train, y_train, X_valid, y_valid)
def get_study_results(study, objective, name):
"""Get the results of an Optuna stufy based on the study name and the objective instance"""
print(f"Best trial - {name}:")
trial = study.best_trial
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
best_model = objective.best_booster
print(f"Best validation score: {best_model.best_score_}")
return best_model
def run_experiment(n_trials, train, valid, name):
"""
Runs an experiment for n_trials using the train & valid dataset
returns: the study and objective instance
"""
print(f"Running experiment for : {name.title()}")
objective = Objective(train=train, valid=valid)
study = optuna.create_study(
pruner=optuna.pruners.MedianPruner(n_warmup_steps=10),
direction="maximize",
)
study.optimize(objective, n_trials=n_trials, callbacks=[objective.callback])
return study, objective
class Objective:
"""
Objective class to perform hyperparameter tuning using Optuna and keep track of the best model
using the callback function.
Takes as input the train and valid dataset to be used and returns the roc_auc score per trial
which will be passed to an Optuna study
"""
def __init__(self, train, valid):
self.best_booster = None
self._booster = None
self.train = train
self.valid = valid
self.X_train, self.y_train, self.X_valid, self.y_valid = get_train_valid(
train=self.train,
valid=self.valid,
)
def __call__(self, trial):
param_grid = {
"n_estimators": trial.suggest_categorical("n_estimators", [10000]),
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
"num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
"max_depth": trial.suggest_int("max_depth", 3, 12),
"min_data_in_leaf": trial.suggest_int(
"min_data_in_leaf", 200, 10000, step=100
),
"lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
"lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
"min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
"bagging_fraction": trial.suggest_float(
"bagging_fraction", 0.2, 0.95, step=0.1
),
"bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
"feature_fraction": trial.suggest_float(
"feature_fraction", 0.2, 0.95, step=0.1
),
}
model = lgb.LGBMClassifier(objective="binary", **param_grid)
model.fit(
self.X_train,
self.y_train,
eval_set=[(self.X_valid, self.y_valid)],
eval_metric=["auc", "binary_logloss"],
early_stopping_rounds=100,
callbacks=[LightGBMPruningCallback(trial, "auc")],
)
self._booster = model
preds = model.predict_proba(self.X_valid)[:, 1]
roc_auc = roc_auc_score(self.y_valid, preds)
return roc_auc
def callback(self, study, trial):
if study.best_trial == trial:
self.best_booster = self._booster
# Sample use case
# Pass in the train and validation data with the name of the experiment
study1, objective1 = run_experiment(n_trials=100, train=data1_train, valid=data1_valid, name="Data1")
# Get the results of the study and the best model from it
best_model1 = get_study_results(study=study1, objective=objective1, name="Data1")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment