Skip to content

Instantly share code, notes, and snippets.

@nsadeh
Created October 28, 2018 22:00
Show Gist options
  • Save nsadeh/796d6659c8d08a3850637b4d6ea8bfbd to your computer and use it in GitHub Desktop.
Save nsadeh/796d6659c8d08a3850637b4d6ea8bfbd to your computer and use it in GitHub Desktop.
class NumericColumns(TransformerMixin):
def __init__(self):
self.columns = [column names]
def fit(self, X):
cols = []
for col in self.columns:
try:
X[col].astype('float')
assert X[col].astype('float').std() > 0
cols.append(col)
except (ValueError, AssertionError):
continue
self.columns = cols
return self
def transform(self, X):
_X = X.loc[:, self.columns]
for col in _X.columns:
_X.loc[:,col] = _X[col].astype('float')
return _X.values
class LowVarianceNumerical(TransformerMixin):
def __init__(self):
self.columns = [# column names]
def fit(self, X):
cols = []
for col in self.columns:
try:
X[col].astype('float')
assert X[col].astype('float').std() == 0
cols.append(col)
except (ValueError, AssertionError):
continue
self.columns = cols
return self
def transform(self, X):
_X = X.loc[:, self.columns]
for col in _X.columns:
values = _X[col].astype('float').values
_X.loc[:, col] = [1 if np.isnan(val)
else 0
for val in values]
return _X.values
class CategoricalColumns(TransformerMixin):
def __init__(self):
self.columns = [# column names]
self.label_enc = []
self.oh_enc = OneHotEncoder()
def fit(self, X):
_X = X[self.columns]
for col in self.columns:
enc = LabelEncoder()
enc.fit(_X[col].astype('str').values)
_X[col] = enc.transform(_X[col].astype('str').values)
self.label_enc.append(enc)
self.oh_enc.fit(_X)
return self
def _labelencode(self, X):
_X = X[self.columns]
for col, enc in zip(self.columns, self.label_enc):
print(enc.classes_)
try:
check_is_fitted(enc, '_classes')
except NotFittedError:
print(col)
pass
_X[col] = enc.transform(_X[col].astype('str').values)
return _X
def transform(self, X):
_X = self._labelencode(X)
return self.oh_enc.transform(_X)
f = FeatureUnion([('numerical', NumericColumns),
('lowvar', LowVarianceNumerical),
('cat', CategoricalColumns)])
f.fit(test_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment