juangesino/1_How to Build a Custom Estimator for scikit-learn.md

## 1_How to Build a Custom Estimator for scikit-learn.md

      
    Raw
  

              1_How to Build a Custom Estimator for scikit-learn.md
            
          
    How to Build a Custom Estimator for scikit-learn


## fit.py
re = ResampledEnsemble()
re.fit(X_train, y_train)

y_pred = re.predict(X_test)
classification_report(y_test, y_pred)


## import.py
from resampled_ensemble import ResampledEnsemble

## load_data.py
data = load_breast_cancer(as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(data.data, data.train, random_state=0)

## matrix.py
plot_confusion_matrix(
    re,
    X_test,
    y_test,
    display_labels=[0, 1, 2],
    cmap=plt.cm.GnBu,
    normalize=None,
    ax=ax1,
)

plot_conf = plot_confusion_matrix(
    re,
    X_test,
    y_test,
    display_labels=[0, 1, 2],
    cmap=plt.cm.GnBu,
    normalize="true",
    ax=ax2,
)

## pipe.py
pipe = make_pipeline(
    SimpleImputer(missing_values=np.nan, strategy="mean"),
    MinMaxScaler(),
    ResampledEnsemble(
        max_features="auto",
        min_samples_split=0.01,
        min_samples_leaf=0.0001,
        n_estimators=300,
    ),
)

grid_params = {
    "resampledensemble__max_depth": np.linspace(5, 40, 3, endpoint=True, dtype=int),
}

grid = GridSearchCV(
    pipe, grid_params, cv=4, return_train_score=True, n_jobs=-1, scoring="f1_macro"
)
grid.fit(X_train, y_train)

best_score = grid.best_score_
best_params = grid.best_params_

## resampled_ensemble_1.py
class ResampledEnsemble(BaseEstimator):

	def __init__(self):
		pass

## resampled_ensemble_2.py
def __init__(self, base_estimator=DecisionTreeClassifier(), n_estimators=100,
		max_depth=None, max_features=None, min_samples_split=2, min_samples_leaf=1):

    self._estimator_type = "classifier"
    self.base_estimator = base_estimator
    self.n_estimators = n_estimators
    self.max_depth = max_depth
    self.max_features = max_features
    self.min_samples_split = min_samples_split
    self.min_samples_leaf = min_samples_leaf

## resampled_ensemble_3.py
def _generate_estimators(self):
    estimators = []

    for i in range(self.n_estimators):
        est = clone(self.base_estimator)

        est.random_state = i

        est.max_depth = self.max_depth
        est.max_features = self.max_features
        est.min_samples_split = self.min_samples_split
        est.min_samples_leaf = self.min_samples_leaf

        pipe = make_imb_pipeline(
            RandomUnderSampler(random_state=i, replacement=True),
            est
        )
        estimators.append((f"est_{i}", pipe))

    return estimators

## resampled_ensemble_4.py
def __init__(
    self,
    base_estimator=DecisionTreeClassifier(),
    n_estimators=100,
    max_depth=None,
    max_features=None,
    min_samples_split=2,
    min_samples_leaf=1,
):

    self._estimator_type = "classifier"
    self.base_estimator = base_estimator
    self.n_estimators = n_estimators
    self.max_depth = max_depth
    self.max_features = max_features
    self.min_samples_split = min_samples_split
    self.min_samples_leaf = min_samples_leaf

    self.estimators = self._generate_estimators()


## resampled_ensemble_5.py
def __init__(
    self,
    base_estimator=DecisionTreeClassifier(),
    n_estimators=100,
    max_depth=None,
    max_features=None,
    min_samples_split=2,
    min_samples_leaf=1,
):

    self._estimator_type = "classifier"
    self.base_estimator = base_estimator
    self.n_estimators = n_estimators
    self.max_depth = max_depth
    self.max_features = max_features
    self.min_samples_split = min_samples_split
    self.min_samples_leaf = min_samples_leaf

    self.estimators = self._generate_estimators()
    self.estimator = VotingClassifier(self.estimators, voting="soft")


## resampled_ensemble_6.py
def fit(self, X, y, sample_weight=None):
    return self.estimator.fit(X, y, sample_weight)

def predict(self, X):
    return self.estimator.predict(X)

def classes_(self):
    if self.estimator:
        return self.estimator.classes_

## resampled_ensemble_7.py
def set_params(self, **params):
    if not params:
        return self

    for key, value in params.items():
        if hasattr(self, key):
            setattr(self, key, value)
        else:
            self.kwargs[key] = value

    self.estimators = self._generate_estimators()
    self.estimator = VotingClassifier(self.estimators, voting="soft")
    return self

## roc.py
plot_roc_curve(re, X_test, y_test, ax=ax);
	re = ResampledEnsemble()
	re.fit(X_train, y_train)

	y_pred = re.predict(X_test)
	classification_report(y_test, y_pred)
	data = load_breast_cancer(as_frame=True)
	X_train, X_test, y_train, y_test = train_test_split(data.data, data.train, random_state=0)
	plot_confusion_matrix(
	re,
	X_test,
	y_test,
	display_labels=[0, 1, 2],
	cmap=plt.cm.GnBu,
	normalize=None,
	ax=ax1,
	)

	plot_conf = plot_confusion_matrix(
	re,
	X_test,
	y_test,
	display_labels=[0, 1, 2],
	cmap=plt.cm.GnBu,
	normalize="true",
	ax=ax2,
	)
	pipe = make_pipeline(
	SimpleImputer(missing_values=np.nan, strategy="mean"),
	MinMaxScaler(),
	ResampledEnsemble(
	max_features="auto",
	min_samples_split=0.01,
	min_samples_leaf=0.0001,
	n_estimators=300,
	),
	)

	grid_params = {
	"resampledensemble__max_depth": np.linspace(5, 40, 3, endpoint=True, dtype=int),
	}

	grid = GridSearchCV(
	pipe, grid_params, cv=4, return_train_score=True, n_jobs=-1, scoring="f1_macro"
	)
	grid.fit(X_train, y_train)

	best_score = grid.best_score_
	best_params = grid.best_params_
	class ResampledEnsemble(BaseEstimator):

	def __init__(self):
	pass
	def __init__(self, base_estimator=DecisionTreeClassifier(), n_estimators=100,
	max_depth=None, max_features=None, min_samples_split=2, min_samples_leaf=1):

	self._estimator_type = "classifier"
	self.base_estimator = base_estimator
	self.n_estimators = n_estimators
	self.max_depth = max_depth
	self.max_features = max_features
	self.min_samples_split = min_samples_split
	self.min_samples_leaf = min_samples_leaf
	def _generate_estimators(self):
	estimators = []

	for i in range(self.n_estimators):
	est = clone(self.base_estimator)

	est.random_state = i

	est.max_depth = self.max_depth
	est.max_features = self.max_features
	est.min_samples_split = self.min_samples_split
	est.min_samples_leaf = self.min_samples_leaf

	pipe = make_imb_pipeline(
	RandomUnderSampler(random_state=i, replacement=True),
	est
	)
	estimators.append((f"est_{i}", pipe))

	return estimators
	def __init__(
	self,
	base_estimator=DecisionTreeClassifier(),
	n_estimators=100,
	max_depth=None,
	max_features=None,
	min_samples_split=2,
	min_samples_leaf=1,
	):

	self._estimator_type = "classifier"
	self.base_estimator = base_estimator
	self.n_estimators = n_estimators
	self.max_depth = max_depth
	self.max_features = max_features
	self.min_samples_split = min_samples_split
	self.min_samples_leaf = min_samples_leaf

	self.estimators = self._generate_estimators()
	def fit(self, X, y, sample_weight=None):
	return self.estimator.fit(X, y, sample_weight)

	def predict(self, X):
	return self.estimator.predict(X)

	def classes_(self):
	if self.estimator:
	return self.estimator.classes_
	def set_params(self, **params):
	if not params:
	return self

	for key, value in params.items():
	if hasattr(self, key):
	setattr(self, key, value)
	else:
	self.kwargs[key] = value

	self.estimators = self._generate_estimators()
	self.estimator = VotingClassifier(self.estimators, voting="soft")
	return self