micahmelling/ultaopt.py Secret

## ultaopt.py
import joblib
import os

from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import cross_val_score
from ultraopt.hdl import layering_config
from ultraopt import fmin
from ultraopt.multi_fidelity import HyperBandIterGenerator

from modeling.config import MODELS_DIRECTORY


def train_model(x_train, y_train, get_pipeline_function, model_name, model, param_space, n_trials, cv_times, scoring):
    """
    Trains a machine learning model, optimizes the hyperparameters, saves the serialized model into the
    MODELS_DIRECTORY, and saves the cross validation results as a csv into the DIAGNOSTICS_DIRECTORY.
    :param x_train: x_train dataframe
    :param y_train: y_train series
    :param get_pipeline_function: callable that takes model to produce a scikit-learn pipeline
    :param model_name: name of the model
    :param model: instantiated model
    :param param_space: the distribution of hyperparameters to search over
    :param n_trials: number of trial to search for optimal hyperparameters
    :param cv_times: number of times to cross validation
    :param scoring: scoring method used for cross validation
    :returns: scikit-learn pipeline
    """
    print(f'training {model_name}...')
    pipeline = get_pipeline_function(model)

    def _evaluate(config):
        local_pipe = pipeline.set_params(**layering_config(config))
        return 1 - float(cross_val_score(local_pipe, x_train, y_train, scoring=scoring, cv=cv_times, n_jobs=-1).mean())

    hb = HyperBandIterGenerator(min_budget=1/4, max_budget=1, eta=2)
    result = fmin(eval_func=_evaluate, config_space=param_space, optimizer="ETPE", n_iterations=n_trials,
                  multi_fidelity_iter_generator=hb)
    best_config = result.best_config
    pipeline.set_params(**best_config)
    pipeline.fit(x_train, y_train)
    joblib.dump(pipeline, os.path.join(model_name, MODELS_DIRECTORY, f'{model_name}.pkl'), compress=3)
    return pipeline
	import joblib
	import os

	from sklearn.calibration import CalibratedClassifierCV
	from sklearn.model_selection import cross_val_score
	from ultraopt.hdl import layering_config
	from ultraopt import fmin
	from ultraopt.multi_fidelity import HyperBandIterGenerator

	from modeling.config import MODELS_DIRECTORY


	def train_model(x_train, y_train, get_pipeline_function, model_name, model, param_space, n_trials, cv_times, scoring):
	"""
	Trains a machine learning model, optimizes the hyperparameters, saves the serialized model into the
	MODELS_DIRECTORY, and saves the cross validation results as a csv into the DIAGNOSTICS_DIRECTORY.
	:param x_train: x_train dataframe
	:param y_train: y_train series
	:param get_pipeline_function: callable that takes model to produce a scikit-learn pipeline
	:param model_name: name of the model
	:param model: instantiated model
	:param param_space: the distribution of hyperparameters to search over
	:param n_trials: number of trial to search for optimal hyperparameters
	:param cv_times: number of times to cross validation
	:param scoring: scoring method used for cross validation
	:returns: scikit-learn pipeline
	"""
	print(f'training {model_name}...')
	pipeline = get_pipeline_function(model)

	def _evaluate(config):
	local_pipe = pipeline.set_params(**layering_config(config))
	return 1 - float(cross_val_score(local_pipe, x_train, y_train, scoring=scoring, cv=cv_times, n_jobs=-1).mean())

	hb = HyperBandIterGenerator(min_budget=1/4, max_budget=1, eta=2)
	result = fmin(eval_func=_evaluate, config_space=param_space, optimizer="ETPE", n_iterations=n_trials,
	multi_fidelity_iter_generator=hb)
	best_config = result.best_config
	pipeline.set_params(**best_config)
	pipeline.fit(x_train, y_train)
	joblib.dump(pipeline, os.path.join(model_name, MODELS_DIRECTORY, f'{model_name}.pkl'), compress=3)
	return pipeline