Coldsp33d/selective_handler_ohe.py

## selective_handler_ohe.py
    from sklearn.preprocessing import OneHotEncoder

    class SelectiveHandlerOHE(OneHotEncoder):
        def __init__(self, *args, raise_error_cols=[], **kwargs):
            kwargs['handle_unknown'] = 'ignore'  # change the default
            self.raise_error_cols = raise_error_cols.copy()
            super().__init__(*args, **kwargs)

        def check_cols(self, X):
            if self.raise_error_cols and any(
                c not in X.columns for c in self.raise_error_cols):
                msg = ("One or more column names are incorrect. "
                       "Please check the column names passed "
                       "to the `raise_error_cols` argument")
                raise ValueError(msg)

            self.columns = X.columns

        def fit(self, X):
            self.check_cols(X)
            return super().fit(X)

        def transform(self, X):
            X_ = np.array(X)
            if X_.ndim > 1:
                for c in self.raise_error_cols:
                    idx = self.columns.get_loc(c)
                    arr1 = X_[:, idx]
                    arr2 = self.categories_[idx]

                    if not np.in1d(arr1, arr2).all():
                        cats = ','.join(np.setdiff1d(arr1, arr2))
                        msg = ("Found unknown categories {0} in column {1}"
                               " during fit".format(cats, c))
                        raise ValueError(msg)

            return super().transform(X)

        def fit_transform(self, X):
            self.check_cols(X)
            return super().fit_transform(X)
	from sklearn.preprocessing import OneHotEncoder

	class SelectiveHandlerOHE(OneHotEncoder):
	def __init__(self, args, raise_error_cols=[], *kwargs):
	kwargs['handle_unknown'] = 'ignore' # change the default
	self.raise_error_cols = raise_error_cols.copy()
	super().__init__(args, *kwargs)

	def check_cols(self, X):
	if self.raise_error_cols and any(
	c not in X.columns for c in self.raise_error_cols):
	msg = ("One or more column names are incorrect. "
	"Please check the column names passed "
	"to the `raise_error_cols` argument")
	raise ValueError(msg)

	self.columns = X.columns

	def fit(self, X):
	self.check_cols(X)
	return super().fit(X)

	def transform(self, X):
	X_ = np.array(X)
	if X_.ndim > 1:
	for c in self.raise_error_cols:
	idx = self.columns.get_loc(c)
	arr1 = X_[:, idx]
	arr2 = self.categories_[idx]

	if not np.in1d(arr1, arr2).all():
	cats = ','.join(np.setdiff1d(arr1, arr2))
	msg = ("Found unknown categories {0} in column {1}"
	" during fit".format(cats, c))
	raise ValueError(msg)

	return super().transform(X)

	def fit_transform(self, X):
	self.check_cols(X)
	return super().fit_transform(X)