Skip to content

Instantly share code, notes, and snippets.

@philwinder
Last active February 3, 2019 10:46
Show Gist options
  • Save philwinder/3f6644b5c5b5040566b4b33d8df2e8e4 to your computer and use it in GitHub Desktop.
Save philwinder/3f6644b5c5b5040566b4b33d8df2e8e4 to your computer and use it in GitHub Desktop.
data = pd.DataFrame({'pet': ['cat', 'dog', 'dog', 'fish', 'cat', 'dog', 'cat', 'fish'],
'children': [4., 6, 3, 3, 2, 3, 5, 4],
'salary': [90., 24, 44, 27, 32, 59, 36, 27]})
data['pet'] = data['pet'].astype("category")
array([[ 1. , 0. , 0. , 0.20851441],
[ 0. , 1. , 0. , 1.87662973],
[ 0. , 1. , 0. , -0.62554324],
[ 0. , 0. , 1. , -0.62554324],
[ 1. , 0. , 0. , -1.4596009 ],
[ 0. , 1. , 0. , -0.62554324],
[ 1. , 0. , 0. , 1.04257207],
[ 0. , 0. , 1. , 0.20851441]])
mapper = DataFrameMapper([
('pet', preprocessing.LabelBinarizer()),
(['children'], preprocessing.StandardScaler())
])
mapper.fit_transform(data.copy())
pipeline.make_pipeline(
TypeSelector("category"),
preprocessing.OneHotEncoder()
)
array([[ 1. , 0. , 0. , 0.20851441, 2.27500192],
[ 0. , 1. , 0. , 1.87662973, -0.87775665],
[ 0. , 1. , 0. , -0.62554324, 0.07762474],
[ 0. , 0. , 1. , -0.62554324, -0.73444944],
[ 1. , 0. , 0. , -1.4596009 , -0.49560409],
[ 0. , 1. , 0. , -0.62554324, 0.79416078],
[ 1. , 0. , 0. , 1.04257207, -0.30452782],
[ 0. , 0. , 1. , 0.20851441, -0.73444944]])
pipe = pipeline.make_union(
pipeline.make_pipeline(
TypeSelector("category"),
preprocessing.OneHotEncoder()
),
pipeline.make_pipeline(
TypeSelector(np.number),
preprocessing.StandardScaler()
)
)
pipe.fit_transform(data.copy()).toarray()
from sklearn.base import BaseEstimator, TransformerMixin
class TypeSelector(BaseEstimator, TransformerMixin):
def __init__(self, dtype):
self.dtype = dtype
def fit(self, X, y=None):
return self
def transform(self, X):
assert isinstance(X, pd.DataFrame)
return X.select_dtypes(include=[self.dtype])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment