Skip to content

Instantly share code, notes, and snippets.

@ychennay
Last active May 19, 2019 18:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ychennay/14ed90107c90f548155a9fa49b1a15d8 to your computer and use it in GitHub Desktop.
Save ychennay/14ed90107c90f548155a9fa49b1a15d8 to your computer and use it in GitHub Desktop.
from typing import List
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator
def bootstrap_fit(X: np.ndarray, y: np.array, model_class, B=200, **model_params) -> List[BaseEstimator]:
"""
Fits and returns a list of B models based upon bootstrap sampling with replacement
:param X: features to fit model on
:param y: target (continuous for regression
:param model_class: any valid sklearn supervised learning class
:param B: number of bootstrapped models to aggregate
:param model_params: additional sklearn model parameters, used when instantiating model_class
:return: a list of fitted sklearn models avaliable to call predict() upon
"""
assert isinstance(model_class(), BaseEstimator)
# we need to recombine X and y since we'll need to bootstrap sample from them with replacement
combined_df = pd.DataFrame(np.column_stack((X, y)))
bootstrapped_models = [] # create a list to store the fitted models
for b in range(B): # bootstrap sample and fit model B times
# model_class can be any scikit-learn supervised model that subscribes to the fit() and predict() API
bootstrap_model = model_class(**model_params)
# sample with replacement
bootstrap_dataset = combined_df.sample(len(combined_df), replace=True)
bootstrap_X = bootstrap_dataset.iloc[:, :-1]
bootstrap_y = bootstrap_dataset.iloc[:, -1]
# fit model and add it to the collection of bootstrapped models
bootstrap_model.fit(bootstrap_X, bootstrap_y)
bootstrapped_models.append(bootstrap_model)
return bootstrapped_models
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment