Created
January 28, 2015 20:46
-
-
Save jseabold/7477faccd7d42b7de5de to your computer and use it in GitHub Desktop.
sklearn transformers that can account for categorical variables
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.base import TransformerMixin, BaseEstimator | |
class StandardTransformer(BaseEstimator, TransformerMixin): | |
def __init__(self, variables=[], ignore=[]): | |
self.variables = variables | |
self.ignore = ignore | |
self.transform_idx = np.asarray([True if i not in self.ignore | |
else False for i in self.variables]) | |
def fit(self, X, y=None): | |
self.mean_ = X[:, self.transform_idx].mean(axis=0) | |
self.std_ = X[:, self.transform_idx].std(axis=0, ddof=1) | |
return self | |
def transform(self, X, y=None): | |
X = X.copy() | |
X[:, self.transform_idx] -= self.mean_ | |
X[:, self.transform_idx] /= self.std_ | |
return X | |
def inverse_transform(self, X, y=None): | |
X = X.copy() | |
X[:, self.transform_idx] *= self.std_ | |
X[:, self.transform_idx] += self.mean_ | |
return X | |
def get_params(self, deep=True): | |
return dict(variables=self.variables, ignore=self.ignore) | |
class MinMaxTransformer(StandardTransformer): | |
def __init__(self, variables=[], ignore=[], feature_range=(0, 1)): | |
self.min, self.max = feature_range | |
super(MinMaxTransformer, self).__init__(variables, ignore) | |
def transform(self, X, y=None): | |
X = X.copy() | |
X_min = self.X_min | |
X_max = self.X_max | |
X_std = (X[:, self.transform_idx] - X_min)/(X_max - X_min) | |
X[:, self.transform_idx] = X_std * (self.max - self.min) + self.min | |
return X | |
def fit(self, X, y=None): | |
X = X.copy() | |
self.X_min = X[:, self.transform_idx].min(0) | |
self.X_max = X[:, self.transform_idx].max(0) | |
return self | |
def inverse_transform(self, X, y=None): | |
X = X.copy() | |
X_std = (X[:, self.transform_idx] - self.min) / (self.max - self.min) | |
X[:, self.transform_idx] = (X_std * | |
(self.X_max - self.X_min) + self.X_min) | |
return X | |
def get_params(self, deep=True): | |
return dict(variables=self.variables, ignore=self.ignore, | |
feature_range=(self.min, self.max)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment