pkgandhi/optimize.py

## optimize.py
# Importing the Packages:
import optuna
import joblib
import pandas as pd
from sklearn import linear_model
from sklearn import datasets
from sklearn import model_selection

X,y = datasets.load_diabetes(return_X_y=True, as_frame=True)

#Step 1. Define an objective function to be maximized.
def objective(trial):

    classifier_name = trial.suggest_categorical("classifier", ["LogReg", "RandomForest"])

    # Step 2. Setup values for the hyperparameters:
    if classifier_name == 'LogReg':
        logreg_c = trial.suggest_float("logreg_c", 1e-10, 1e10, log=True)
        classifier_obj = linear_model.LogisticRegression(C=logreg_c)
    else:
        rf_n_estimators = trial.suggest_int("rf_n_estimators", 10, 1000)
        rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
        classifier_obj = ensemble.RandomForestClassifier(
            max_depth=rf_max_depth, n_estimators=rf_n_estimators
        )

    # Step 3: Scoring method:
    score = model_selection.cross_val_score(classifier_obj, X, y, n_jobs=-1, cv=3)
    accuracy = score.mean()
    return accuracy

if __name__ == '__main__':
    # Step 4: Running it
    study = joblib.load('experiments.pkl')
    study.optimize(objective, n_trials=3)
    joblib.dump(study, 'experiments.pkl')
	# Importing the Packages:
	import optuna
	import joblib
	import pandas as pd
	from sklearn import linear_model
	from sklearn import datasets
	from sklearn import model_selection

	X,y = datasets.load_diabetes(return_X_y=True, as_frame=True)

	#Step 1. Define an objective function to be maximized.
	def objective(trial):

	classifier_name = trial.suggest_categorical("classifier", ["LogReg", "RandomForest"])

	# Step 2. Setup values for the hyperparameters:
	if classifier_name == 'LogReg':
	logreg_c = trial.suggest_float("logreg_c", 1e-10, 1e10, log=True)
	classifier_obj = linear_model.LogisticRegression(C=logreg_c)
	else:
	rf_n_estimators = trial.suggest_int("rf_n_estimators", 10, 1000)
	rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
	classifier_obj = ensemble.RandomForestClassifier(
	max_depth=rf_max_depth, n_estimators=rf_n_estimators
	)

	# Step 3: Scoring method:
	score = model_selection.cross_val_score(classifier_obj, X, y, n_jobs=-1, cv=3)
	accuracy = score.mean()
	return accuracy

	if __name__ == '__main__':
	# Step 4: Running it
	study = joblib.load('experiments.pkl')
	study.optimize(objective, n_trials=3)
	joblib.dump(study, 'experiments.pkl')