Skip to content

Instantly share code, notes, and snippets.

@lewtun
Created December 24, 2019 22:47
Show Gist options
  • Save lewtun/dcd49e5eebb3390f41c1a745d2f65699 to your computer and use it in GitHub Desktop.
Save lewtun/dcd49e5eebb3390f41c1a745d2f65699 to your computer and use it in GitHub Desktop.
def cv_model(X, y, features, n_fold=5, random_state=45245, params=None):
"""Evaluate a score by cross validation.
Parameters
----------
X : pandas.DataFrame
The data to fit.
y : pandas.DataFrame or pandas.Series
The scalar coupling constants as target variables.
features : list
The list of features to use during training.
n_fold : int, default `5`
The number of fold to use in cross validation.
random_state : int, default `45245`
The seed for the KFold split
params : dict, default `None`
The parameter dictionary for XGBRegressor
Returns
-------
results_mean : list
List of the scores for each coupling type, averaged over all folds.
results_details : list
List of all the scores as a list of lists.
"""
X = X[features]
folds = KFold(n_splits=n_fold, shuffle=True, random_state=random_state)
model = XGBRegressor(**params)
results_mean = []
results_details = []
for fold_n, (train_index, valid_index) in enumerate(folds.split(X)):
X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
model.fit(X_train, y_train)
y_pred = model.predict(X_valid)
scores = group_mean_log_mae(y_pred, y_valid, X_valid["type"])
results_mean.append(scores[0])
results_details.append(list(scores[1]))
print(
"After {}-fold CV: Mean: ".format(n_fold),
np.mean(results_mean),
"Std.:",
np.std(results_mean),
)
return results_mean, results_details
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment