Skip to content

Instantly share code, notes, and snippets.

@Katsumata420
Last active July 5, 2022 03:12
Show Gist options
  • Save Katsumata420/9a312d75e2b7d78b24d9defb4eeb0033 to your computer and use it in GitHub Desktop.
Save Katsumata420/9a312d75e2b7d78b24d9defb4eeb0033 to your computer and use it in GitHub Desktop.
Optuna with mlflow integration from this example (https://github.com/optuna/optuna-examples/blob/main/sklearn/sklearn_simple.py)
"""
Optuna example that optimizes a classifier configuration for Iris dataset using sklearn.
In this example, we optimize a classifier configuration for Iris dataset. Classifiers are from
scikit-learn. We optimize both the choice of classifier (among SVC and RandomForest) and their
hyperparameters.
"""
import mlflow
import optuna
from optuna.integration.mlflow import MLflowCallback, RUN_ID_ATTRIBUTE_KEY
import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm
optuna.logging.set_verbosity(optuna.logging.WARNING)
# FYI: Objective functions can take additional arguments
# (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args).
class OptunaObjective:
def __init__(self, experiment_id):
self.experiment_id = experiment_id
def __call__(self, trial):
with mlflow.start_run(experiment_id=self.experiment_id, run_name=str(trial.number)) as active_run:
run_id = active_run.info.run_id
trial.set_system_attr(RUN_ID_ATTRIBUTE_KEY, run_id)
iris = sklearn.datasets.load_iris()
x, y = iris.data, iris.target
classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
if classifier_name == "SVC":
svc_c = trial.suggest_float("svc_c", 1e-10, 1e10, log=True)
classifier_obj = sklearn.svm.SVC(C=svc_c, gamma="auto")
else:
rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
classifier_obj = sklearn.ensemble.RandomForestClassifier(
max_depth=rf_max_depth, n_estimators=10
)
# https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter
metric = {"f": "f1_macro", "accuracy": "accuracy"}
score = sklearn.model_selection.cross_validate(classifier_obj, x, y, n_jobs=-1, cv=3, scoring=metric)
score = {k: v.mean() for k, v in score.items()}
accuracy = score["test_accuracy"]
with mlflow.start_run(experiment_id=self.experiment_id, run_id=run_id):
mlflow.log_metrics(score)
return accuracy
if __name__ == "__main__":
experiment_id = mlflow.create_experiment(name="optuna-experiment") # mlflow に保存される experiment-name
objective = OptunaObjective(experiment_id)
mlflow_kwargs = {"experiment_id": experiment_id}
# mlflow に保存される metric-name
# 下記の MLflowCallback は optuna が v3.0.0 以降でないとだめ
mlflow_call = MLflowCallback(metric_name="hoge", create_experiment=False, mlflow_kwargs=mlflow_kwargs)
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, callbacks=[mlflow_call])
print(study.best_trial)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment