Skip to content

Instantly share code, notes, and snippets.

@DFoly
Last active April 17, 2021 14:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DFoly/77d0fa1270344aa9353236d14d318d1d to your computer and use it in GitHub Desktop.
Save DFoly/77d0fa1270344aa9353236d14d318d1d to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
class CustomImputer(BaseEstimator, TransformerMixin):
"""Impute missing data for numerical features."""
def __init__(self, variables=None):
if not isinstance(variables, list):
self.variables = [variables]
else:
self.variables = variables
def fit(self, X, y = None) :
self.imputer_dict_ = {}
for feature in self.variables:
self.imputer_dict_[feature] = X[feature].mean()
return self
def transform(self, X):
X = X.copy()
for feature in self.variables:
X[feature].fillna(self.imputer_dict_[feature], inplace=True)
return X
# generate some data
X, y = make_blobs(n_samples=10, centers=3, n_features=4,
random_state=0)
df = pd.DataFrame(X, columns = ['X1', 'X2', 'X3', 'X4'])
df['X1'].iloc[2:8] = np.nan # add missing values
missing_columns = df.columns[df.isnull().any()].values[0]
preprocessor = Pipeline(steps=[
('imputer', CustomImputer(missing_columns)),
('scaler', StandardScaler())])
lr = Pipeline(steps=[('preprocessor', preprocessor),
('classifier', LogisticRegression())])
lr.fit(df, y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment