jeanmidevacc/surprise_search.py

## surprise_search.py
from time import time
from hyperopt import fmin, tpe, hp, anneal, Trials
import mlflow
from sklearn.metrics import mean_squared_error
import surprise

def evaluate_model(model, dfp_ratings_test):
    dfp_evaluation = dfp_ratings_test.copy()
    dfp_evaluation["rating_predicted"] = dfp_evaluation.apply(lambda row: compute_ranking(model, str(row["userid"]), str(row["contentid"])), axis=1)
    return mean_squared_error(dfp_evaluation["rating"].tolist(), dfp_evaluation["rating_predicted"].tolist(), squared=False)

def mlflow_logging(rmse, training_time, evaluation_time, model_name, runid, type_training, params, log_model=log_model):
    with mlflow.start_run(nested = True):
        metrics = {
            "rmse" : rmse,
            "training_time": training_time,
            "evaluation_time": evaluation_time
        }
        params["model"] = model_name
        mlflow.set_tags({"model" : model_name, "runid" : runid, "type_training": type_training})
        mlflow.log_params(params)
        mlflow.log_metrics(metrics)

def train_and_evaluate(params, model_name=model_name, trainset=trainset, data_train=data_train, dfp_ratings_test=dfp_ratings_test):
    rmse = 100
    try:
        tic_training = time()
        params = {
            "n_factors" : int(params["n_factors"]),
            "n_epochs" : int(params["n_epochs"]),
            "biased" : params["biased"],
            "reg_pu" : params["reg_pu"],
            'reg_qi' : params['reg_qi']
        }

        model = surprise.prediction_algorithms.matrix_factorization.NMF(**params)
        model = model.fit(trainset_surprise)
        training_time = time() - tic_training

        tic_evaluation = time()
        rmse = evaluate_model(model, dfp_ratings_test)
        evaluation_time = time() - tic_evaluation
        mlflow_logging(rmse, training_time, evaluation_time, model_name, runid, type_training, params)
    except Exception as e:
        print(e)
    return rmse

max_evals = 100
model_name = "NMF"
space = {
    "n_factors" : hp.randint('k', 1, 100),
    "n_epochs" : hp.randint('n_epochs', 1, 100),
    "biased" : hp.choice('biased', [True, False]),
    "reg_pu" : hp.uniform('reg_u', 0.001, 100),
    "reg_qi" : hp.uniform('reg_i',  0.001, 100)
}

trials = Trials()
best=fmin(fn=train_and_evaluate, # function to optimize
          space=space,
          algo=tpe.suggest, # optimization algorithm, hyperotp will select its parameters automatically
          max_evals=max_evals, # maximum number of iterations
          trials=trials)
	from time import time
	from hyperopt import fmin, tpe, hp, anneal, Trials
	import mlflow
	from sklearn.metrics import mean_squared_error
	import surprise

	def evaluate_model(model, dfp_ratings_test):
	dfp_evaluation = dfp_ratings_test.copy()
	dfp_evaluation["rating_predicted"] = dfp_evaluation.apply(lambda row: compute_ranking(model, str(row["userid"]), str(row["contentid"])), axis=1)
	return mean_squared_error(dfp_evaluation["rating"].tolist(), dfp_evaluation["rating_predicted"].tolist(), squared=False)

	def mlflow_logging(rmse, training_time, evaluation_time, model_name, runid, type_training, params, log_model=log_model):
	with mlflow.start_run(nested = True):
	metrics = {
	"rmse" : rmse,
	"training_time": training_time,
	"evaluation_time": evaluation_time
	}
	params["model"] = model_name
	mlflow.set_tags({"model" : model_name, "runid" : runid, "type_training": type_training})
	mlflow.log_params(params)
	mlflow.log_metrics(metrics)

	def train_and_evaluate(params, model_name=model_name, trainset=trainset, data_train=data_train, dfp_ratings_test=dfp_ratings_test):
	rmse = 100
	try:
	tic_training = time()
	params = {
	"n_factors" : int(params["n_factors"]),
	"n_epochs" : int(params["n_epochs"]),
	"biased" : params["biased"],
	"reg_pu" : params["reg_pu"],
	'reg_qi' : params['reg_qi']
	}

	model = surprise.prediction_algorithms.matrix_factorization.NMF(**params)
	model = model.fit(trainset_surprise)
	training_time = time() - tic_training

	tic_evaluation = time()
	rmse = evaluate_model(model, dfp_ratings_test)
	evaluation_time = time() - tic_evaluation
	mlflow_logging(rmse, training_time, evaluation_time, model_name, runid, type_training, params)
	except Exception as e:
	print(e)
	return rmse

	max_evals = 100
	model_name = "NMF"
	space = {
	"n_factors" : hp.randint('k', 1, 100),
	"n_epochs" : hp.randint('n_epochs', 1, 100),
	"biased" : hp.choice('biased', [True, False]),
	"reg_pu" : hp.uniform('reg_u', 0.001, 100),
	"reg_qi" : hp.uniform('reg_i', 0.001, 100)
	}

	trials = Trials()
	best=fmin(fn=train_and_evaluate, # function to optimize
	space=space,
	algo=tpe.suggest, # optimization algorithm, hyperotp will select its parameters automatically
	max_evals=max_evals, # maximum number of iterations
	trials=trials)