havron/cross_validate.py

## cross_validate.py
from sklearn.model_selection import KFold
from sklearn.base import clone

def cross_validate(features, labels, nsplits, model):
    '''Returns tuple of (scores : list, average_score : float) over K folds.
        Keyword arguments:
        nsplits : int -- the number of folds to perform cross validation on.
        model -- an object that is the model for CV. Assumes fit() and score() methods,
                 akin to sklearn model APIs.
        features : pandas.DataFrame -- a Pandas DataFrame containing preprocessed training data features.
        labels : pandas.Series -- a Pandas Series containing the labels for the training data.
    '''
    kf = KFold(n_splits=nsplits)
    scores = []
    for train_index, test_index in kf.split(features):
        X_train, X_test = features.loc[train_index], features.loc[test_index]
        Y_train, Y_test = labels.loc[train_index], labels.loc[test_index]
        model = clone(model) # clean copy of the model
        model.fit(X_train, Y_train)
        scores.append(model.score(X_test, Y_test))
    return scores, sum(scores) / float(len(scores))
	from sklearn.model_selection import KFold
	from sklearn.base import clone

	def cross_validate(features, labels, nsplits, model):
	'''Returns tuple of (scores : list, average_score : float) over K folds.
	Keyword arguments:
	nsplits : int -- the number of folds to perform cross validation on.
	model -- an object that is the model for CV. Assumes fit() and score() methods,
	akin to sklearn model APIs.
	features : pandas.DataFrame -- a Pandas DataFrame containing preprocessed training data features.
	labels : pandas.Series -- a Pandas Series containing the labels for the training data.
	'''
	kf = KFold(n_splits=nsplits)
	scores = []
	for train_index, test_index in kf.split(features):
	X_train, X_test = features.loc[train_index], features.loc[test_index]
	Y_train, Y_test = labels.loc[train_index], labels.loc[test_index]
	model = clone(model) # clean copy of the model
	model.fit(X_train, Y_train)
	scores.append(model.score(X_test, Y_test))
	return scores, sum(scores) / float(len(scores))