Instantly share code, notes, and snippets.

Embed
What would you like to do?
sklearn transformers that can account for categorical variables
import numpy as np
from sklearn.base import TransformerMixin, BaseEstimator
class StandardTransformer(BaseEstimator, TransformerMixin):
def __init__(self, variables=[], ignore=[]):
self.variables = variables
self.ignore = ignore
self.transform_idx = np.asarray([True if i not in self.ignore
else False for i in self.variables])
def fit(self, X, y=None):
self.mean_ = X[:, self.transform_idx].mean(axis=0)
self.std_ = X[:, self.transform_idx].std(axis=0, ddof=1)
return self
def transform(self, X, y=None):
X = X.copy()
X[:, self.transform_idx] -= self.mean_
X[:, self.transform_idx] /= self.std_
return X
def inverse_transform(self, X, y=None):
X = X.copy()
X[:, self.transform_idx] *= self.std_
X[:, self.transform_idx] += self.mean_
return X
def get_params(self, deep=True):
return dict(variables=self.variables, ignore=self.ignore)
class MinMaxTransformer(StandardTransformer):
def __init__(self, variables=[], ignore=[], feature_range=(0, 1)):
self.min, self.max = feature_range
super(MinMaxTransformer, self).__init__(variables, ignore)
def transform(self, X, y=None):
X = X.copy()
X_min = self.X_min
X_max = self.X_max
X_std = (X[:, self.transform_idx] - X_min)/(X_max - X_min)
X[:, self.transform_idx] = X_std * (self.max - self.min) + self.min
return X
def fit(self, X, y=None):
X = X.copy()
self.X_min = X[:, self.transform_idx].min(0)
self.X_max = X[:, self.transform_idx].max(0)
return self
def inverse_transform(self, X, y=None):
X = X.copy()
X_std = (X[:, self.transform_idx] - self.min) / (self.max - self.min)
X[:, self.transform_idx] = (X_std *
(self.X_max - self.X_min) + self.X_min)
return X
def get_params(self, deep=True):
return dict(variables=self.variables, ignore=self.ignore,
feature_range=(self.min, self.max))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment