Skip to content

Instantly share code, notes, and snippets.

@jiahao87
Last active July 25, 2022 20:51
Show Gist options
  • Save jiahao87/e7d9ede444a41161879d7b4845f0a6c0 to your computer and use it in GitHub Desktop.
Save jiahao87/e7d9ede444a41161879d7b4845f0a6c0 to your computer and use it in GitHub Desktop.
Full sample code for MLflow example
import os
import numpy as np
from scipy.stats import uniform
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn import metrics
from sklearn.model_selection import ParameterSampler
from sklearn.ensemble import RandomForestClassifier
import mlflow
import mlflow.sklearn
####################### Amend configurations here ########################
# Credentials
GOOGLE_APPLICATION_CREDENTIALS = <GOOGLE_APPLICATION_CREDENTIALS> # path to service account json file
MLFLOW_TRACKING_USERNAME = <MLFLOW_TRACKING_USERNAME> # username
MLFLOW_TRACKING_PASSWORD = <MLFLOW_TRACKING_PASSWORD> # password
experiment_name = "Experiment 1" # amend experiment name accordingly
tracking_uri = './mlruns' # Or external IP e.g., "http://35.225.50.9:80"
# Hyperparameters distribution for our model
hyperparams = {'max_depth':range(5,21),
'max_samples':uniform(loc=0.5, scale=0.5),
'max_features': [None, 'sqrt', 'log2']}
# Other fixed parameters
params = {'cv_folds':3,
'n_iter':6}
# Metrics to score and log
metrics = ['accuracy', 'f1_macro'] # run sorted(metrics.SCORERS.keys()) to see list of metrics avaliable
########################################################################
# Set environment variables
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_APPLICATION_CREDENTIALS
os.environ['MLFLOW_TRACKING_USERNAME'] = MLFLOW_TRACKING_USERNAME
os.environ['MLFLOW_TRACKING_PASSWORD'] = MLFLOW_TRACKING_PASSWORD
def data_processing():
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
return X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = data_processing()
# Set experiment name
mlflow.set_experiment(experiment_name)
experiment = mlflow.get_experiment_by_name(experiment_name)
# Set path to log
mlflow.set_tracking_uri(tracking_uri)
param_list = list(ParameterSampler(hyperparams, n_iter=params['n_iter'], random_state=0))
for run in range(params['n_iter']):
run_hyperparams = param_list[run]
with mlflow.start_run(experiment_id = experiment.experiment_id):
clf = RandomForestClassifier(max_depth=run_hyperparams['max_depth'],
max_samples=run_hyperparams['max_samples'],
max_features=run_hyperparams['max_features'],
random_state=0)
clf.fit(X_train, y_train)
scores = cross_validate(clf, X_train, y_train,
cv = params['cv_folds'],
scoring = metrics)
metrics_dict = {}
for metric in metrics:
metrics_dict[metric] = np.mean(scores['test_' + metric])
#log model params
mlflow.log_params(run_hyperparams)
#log model params
mlflow.log_metrics(metrics_dict)
# log model
mlflow.sklearn.log_model(clf, "model")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment