Skip to content

Instantly share code, notes, and snippets.

@pabsan-0
Created February 26, 2023 15:26
Show Gist options
  • Save pabsan-0/29b9dccd3638c6e59b8a899ec273d81b to your computer and use it in GitHub Desktop.
Save pabsan-0/29b9dccd3638c6e59b8a899ec273d81b to your computer and use it in GitHub Desktop.
sklearn cross validation example with KFold
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.linear_model import LinearRegression
df = load_iris(return_X_y=True, as_frame=True)
X = df[0]
y = df[1]
kf = KFold(n_splits=5, shuffle=True)
model = LinearRegression()
score_list = []
for split_id, (train_ids, test_ids) in enumerate(kf.split(X)):
X_train, X_test = X.iloc[train_ids], X.iloc[test_ids]
y_train, y_test = y.iloc[train_ids], y.iloc[test_ids]
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
score_list.append(
metrics.mean_squared_error(y_pred, y_test)
)
print(score_list)
print(np.mean(score_list))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment