makispl/baseline_model.py

## baseline_model.py
def run_model(df, folds, feats, model):
    """
    Takes in a dataframe of 'plays', the # folds,
    features list and model, prints and returns
    the mean score across all the folds
    Parameters
    ---------
    df : a dataframe object
            Contains the plays
    folds : int
            Number of foldsZ
    feats : a list object
            Contains the features' columns
    model : a string object
            The model name

    Returns
    -------
    np.mean(scores) : float
            Contains the average score
    """

    scores = []
    for fold in range(folds):

        # get training data using folds
        df_train = df[df.kfold != fold].reset_index(drop=True)

        # get validation data using folds
        df_valid = df[df.kfold == fold].reset_index(drop=True)

        # get training data
        x_train = df_train[feats].values

        # get validation data
        x_valid = df_valid[feats].values

        # initialize Logistic Regression model
        model =  model

        # fit model on training data (ohe)
        model.fit(x_train, df_train.gm_cluster.values)

        # predict on validation data
        valid_preds = model.predict(x_valid)

        # get f1_weighted score
        f1 = f1_score(df_valid.gm_cluster.values, valid_preds, average='weighted')

        # print f1
        print(f"Fold = {fold}, F1 = {f1}")

        # register the score in the score list
        scores.append(f1)

    # print total score
    print(f"Model {model} \n===================\nMean F1 Score = {np.mean(scores)}")
    return np.mean(scores)

# initialize Logistic Regression model
logres = LogisticRegression(
    multi_class='multinomial',
    solver='lbfgs',
    n_jobs=-1)
	def run_model(df, folds, feats, model):
	"""
	Takes in a dataframe of 'plays', the # folds,
	features list and model, prints and returns
	the mean score across all the folds
	Parameters
	---------
	df : a dataframe object
	Contains the plays
	folds : int
	Number of foldsZ
	feats : a list object
	Contains the features' columns
	model : a string object
	The model name

	Returns
	-------
	np.mean(scores) : float
	Contains the average score
	"""

	scores = []
	for fold in range(folds):

	# get training data using folds
	df_train = df[df.kfold != fold].reset_index(drop=True)

	# get validation data using folds
	df_valid = df[df.kfold == fold].reset_index(drop=True)

	# get training data
	x_train = df_train[feats].values

	# get validation data
	x_valid = df_valid[feats].values

	# initialize Logistic Regression model
	model = model

	# fit model on training data (ohe)
	model.fit(x_train, df_train.gm_cluster.values)

	# predict on validation data
	valid_preds = model.predict(x_valid)

	# get f1_weighted score
	f1 = f1_score(df_valid.gm_cluster.values, valid_preds, average='weighted')

	# print f1
	print(f"Fold = {fold}, F1 = {f1}")

	# register the score in the score list
	scores.append(f1)

	# print total score
	print(f"Model {model} \n===================\nMean F1 Score = {np.mean(scores)}")
	return np.mean(scores)

	# initialize Logistic Regression model
	logres = LogisticRegression(
	multi_class='multinomial',
	solver='lbfgs',
	n_jobs=-1)