Skip to content

Instantly share code, notes, and snippets.

@rikturr
Last active December 8, 2020 19:35
Show Gist options
  • Save rikturr/207a5e0dc522c76169d872b2c5d20071 to your computer and use it in GitHub Desktop.
Save rikturr/207a5e0dc522c76169d872b2c5d20071 to your computer and use it in GitHub Desktop.
dask grid search
from dask_ml.compose import ColumnTransformer
from dask_ml.preprocessing import StandardScaler, DummyEncoder, Categorizer
from dask_ml.model_selection import GridSearchCV
# Dask has slightly different way of one-hot encoding
pipeline = Pipeline(steps=[
('categorize', Categorizer(columns=categorical_feat)),
('onehot', DummyEncoder(columns=categorical_feat)),
('scale', ColumnTransformer(
transformers=[('num', StandardScaler(), numeric_feat)],
remainder='passthrough',
)),
('clf', ElasticNet(normalize=False, max_iter=100)),
])
# params same as above
grid_search = GridSearchCV(pipeline, params, cv=3, scoring='neg_mean_squared_error')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment