Created
December 24, 2019 22:47
-
-
Save lewtun/dcd49e5eebb3390f41c1a745d2f65699 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cv_model(X, y, features, n_fold=5, random_state=45245, params=None): | |
"""Evaluate a score by cross validation. | |
Parameters | |
---------- | |
X : pandas.DataFrame | |
The data to fit. | |
y : pandas.DataFrame or pandas.Series | |
The scalar coupling constants as target variables. | |
features : list | |
The list of features to use during training. | |
n_fold : int, default `5` | |
The number of fold to use in cross validation. | |
random_state : int, default `45245` | |
The seed for the KFold split | |
params : dict, default `None` | |
The parameter dictionary for XGBRegressor | |
Returns | |
------- | |
results_mean : list | |
List of the scores for each coupling type, averaged over all folds. | |
results_details : list | |
List of all the scores as a list of lists. | |
""" | |
X = X[features] | |
folds = KFold(n_splits=n_fold, shuffle=True, random_state=random_state) | |
model = XGBRegressor(**params) | |
results_mean = [] | |
results_details = [] | |
for fold_n, (train_index, valid_index) in enumerate(folds.split(X)): | |
X_train, X_valid = X.iloc[train_index], X.iloc[valid_index] | |
y_train, y_valid = y.iloc[train_index], y.iloc[valid_index] | |
model.fit(X_train, y_train) | |
y_pred = model.predict(X_valid) | |
scores = group_mean_log_mae(y_pred, y_valid, X_valid["type"]) | |
results_mean.append(scores[0]) | |
results_details.append(list(scores[1])) | |
print( | |
"After {}-fold CV: Mean: ".format(n_fold), | |
np.mean(results_mean), | |
"Std.:", | |
np.std(results_mean), | |
) | |
return results_mean, results_details |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment