Skip to content

Instantly share code, notes, and snippets.

@jseabold
Created January 28, 2015 20:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jseabold/7477faccd7d42b7de5de to your computer and use it in GitHub Desktop.
Save jseabold/7477faccd7d42b7de5de to your computer and use it in GitHub Desktop.
sklearn transformers that can account for categorical variables
import numpy as np
from sklearn.base import TransformerMixin, BaseEstimator
class StandardTransformer(BaseEstimator, TransformerMixin):
def __init__(self, variables=[], ignore=[]):
self.variables = variables
self.ignore = ignore
self.transform_idx = np.asarray([True if i not in self.ignore
else False for i in self.variables])
def fit(self, X, y=None):
self.mean_ = X[:, self.transform_idx].mean(axis=0)
self.std_ = X[:, self.transform_idx].std(axis=0, ddof=1)
return self
def transform(self, X, y=None):
X = X.copy()
X[:, self.transform_idx] -= self.mean_
X[:, self.transform_idx] /= self.std_
return X
def inverse_transform(self, X, y=None):
X = X.copy()
X[:, self.transform_idx] *= self.std_
X[:, self.transform_idx] += self.mean_
return X
def get_params(self, deep=True):
return dict(variables=self.variables, ignore=self.ignore)
class MinMaxTransformer(StandardTransformer):
def __init__(self, variables=[], ignore=[], feature_range=(0, 1)):
self.min, self.max = feature_range
super(MinMaxTransformer, self).__init__(variables, ignore)
def transform(self, X, y=None):
X = X.copy()
X_min = self.X_min
X_max = self.X_max
X_std = (X[:, self.transform_idx] - X_min)/(X_max - X_min)
X[:, self.transform_idx] = X_std * (self.max - self.min) + self.min
return X
def fit(self, X, y=None):
X = X.copy()
self.X_min = X[:, self.transform_idx].min(0)
self.X_max = X[:, self.transform_idx].max(0)
return self
def inverse_transform(self, X, y=None):
X = X.copy()
X_std = (X[:, self.transform_idx] - self.min) / (self.max - self.min)
X[:, self.transform_idx] = (X_std *
(self.X_max - self.X_min) + self.X_min)
return X
def get_params(self, deep=True):
return dict(variables=self.variables, ignore=self.ignore,
feature_range=(self.min, self.max))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment