Created
March 28, 2022 14:55
-
-
Save krsnewwave/8af0bf189f240c24e828de6d0386be9a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def factorize_optimize(train, test, eval_train, sp_item_feats, params: Dict): | |
k = params["k"] | |
random_seed = params["random_seed"] | |
epochs = params["epochs"] | |
loss = params["loss"] | |
study = optuna.create_study(study_name="optimize warp", direction="maximize") | |
fun_objective = partial(optuna_objective, train, test, | |
eval_train, sp_item_feats, params) | |
# mlflow callback for tracking | |
# additional setting: nested runs | |
mlflc = MLflowCallback( | |
tracking_uri=mlflow.get_tracking_uri(), | |
metric_name=f"test_precision_at_{k}", | |
nest_trials=True | |
) | |
logger = logging.getLogger(__name__) | |
logger.info("Optimizing model hyperparams") | |
# increase trials for better success (>100) | |
study.optimize(fun_objective, n_trials=10, callbacks=[mlflc]) | |
# storing best value and the model | |
logger.info(f"Training best model (params: {study.best_params})") | |
n_components = study.best_params["n_components"] | |
mlflow.log_param("n_components", n_components) | |
warp_model, test_prec, train_prec = train_model( | |
train, test, eval_train, sp_item_feats, | |
random_seed, epochs, k, n_components, loss) | |
dict_metrics = {f"train_precision_at_{k}": {"value": train_prec, "step": 0}, | |
f"test_precision_at_{k}": {"value": test_prec, "step": 0}} | |
item_biases, item_factors = warp_model.get_item_representations( | |
features=sp_item_feats) | |
user_biases, user_factors = warp_model.get_user_representations() | |
return {"user_factors": user_factors, | |
"item_factors": item_factors, | |
"user_biases": user_biases, | |
"item_biases": item_biases, | |
"model_metrics": dict_metrics, | |
"embedding_size": n_components} | |
def train_model(train, test, eval_train, sp_item_feats, | |
random_seed, epochs, k, n_components, loss): | |
"""Trains model | |
""" | |
warp_model = LightFM(no_components=n_components, | |
loss=loss, random_state=random_seed) | |
for _ in range(epochs): | |
warp_model.fit_partial(train, item_features=sp_item_feats, | |
num_threads=2, epochs=1) | |
test_prec = precision_at_k( | |
warp_model, test, train_interactions=train, k=k, item_features=sp_item_feats) | |
train_prec = precision_at_k( | |
warp_model, eval_train, train_interactions=None, k=k, item_features=sp_item_feats) | |
test_prec = np.mean(test_prec) | |
train_prec = np.mean(train_prec) | |
logger = logging.getLogger(__name__) | |
logger.info(f"Train: {train_prec}, Test: {test_prec}") | |
return warp_model, test_prec, train_prec | |
def optuna_objective(train, test, eval_train, sp_item_feats, params: Dict, | |
trial: optuna.trial): | |
k = params["k"] | |
random_seed = params["random_seed"] | |
epochs = params["epochs"] | |
loss = params["loss"] | |
# optimize this | |
n_components = trial.suggest_int("n_components", 10, 80) | |
_, test_prec, _ = train_model(train, test, eval_train, sp_item_feats, | |
random_seed, epochs, k, n_components, loss) | |
return test_prec |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment