Created
October 28, 2018 22:00
-
-
Save nsadeh/796d6659c8d08a3850637b4d6ea8bfbd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NumericColumns(TransformerMixin): | |
def __init__(self): | |
self.columns = [column names] | |
def fit(self, X): | |
cols = [] | |
for col in self.columns: | |
try: | |
X[col].astype('float') | |
assert X[col].astype('float').std() > 0 | |
cols.append(col) | |
except (ValueError, AssertionError): | |
continue | |
self.columns = cols | |
return self | |
def transform(self, X): | |
_X = X.loc[:, self.columns] | |
for col in _X.columns: | |
_X.loc[:,col] = _X[col].astype('float') | |
return _X.values | |
class LowVarianceNumerical(TransformerMixin): | |
def __init__(self): | |
self.columns = [# column names] | |
def fit(self, X): | |
cols = [] | |
for col in self.columns: | |
try: | |
X[col].astype('float') | |
assert X[col].astype('float').std() == 0 | |
cols.append(col) | |
except (ValueError, AssertionError): | |
continue | |
self.columns = cols | |
return self | |
def transform(self, X): | |
_X = X.loc[:, self.columns] | |
for col in _X.columns: | |
values = _X[col].astype('float').values | |
_X.loc[:, col] = [1 if np.isnan(val) | |
else 0 | |
for val in values] | |
return _X.values | |
class CategoricalColumns(TransformerMixin): | |
def __init__(self): | |
self.columns = [# column names] | |
self.label_enc = [] | |
self.oh_enc = OneHotEncoder() | |
def fit(self, X): | |
_X = X[self.columns] | |
for col in self.columns: | |
enc = LabelEncoder() | |
enc.fit(_X[col].astype('str').values) | |
_X[col] = enc.transform(_X[col].astype('str').values) | |
self.label_enc.append(enc) | |
self.oh_enc.fit(_X) | |
return self | |
def _labelencode(self, X): | |
_X = X[self.columns] | |
for col, enc in zip(self.columns, self.label_enc): | |
print(enc.classes_) | |
try: | |
check_is_fitted(enc, '_classes') | |
except NotFittedError: | |
print(col) | |
pass | |
_X[col] = enc.transform(_X[col].astype('str').values) | |
return _X | |
def transform(self, X): | |
_X = self._labelencode(X) | |
return self.oh_enc.transform(_X) | |
f = FeatureUnion([('numerical', NumericColumns), | |
('lowvar', LowVarianceNumerical), | |
('cat', CategoricalColumns)]) | |
f.fit(test_df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment