Created
November 6, 2022 16:56
-
-
Save muhammadanas0716/bdfbff903e3dad2915b84c3278b9aff3 to your computer and use it in GitHub Desktop.
Using Pipelines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.impute import SimpleImputer | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.pipeline import make_pipeline | |
train = pd.DataFrame({'feature1' : [10, 20, np.nan, 2], 'feature2': [25., 20, 5, 3], 'label':['A', 'A', 'B', 'B']}) | |
test = pd.DataFrame({'feature1' : [30., 5, 15], 'feature2' :[ 12, 10, np.nan]}) | |
train[:1] # One Row of the Training Set | |
test[:1] # One Row of the Test Set, we have to predict the label | |
imputer = SimpleImputer() # Initate the Imputer Class | |
clf = LogisticRegression() # Initiate the ML Model | |
# Pipeline: Impute missing values, then pass the results to the classifier | |
pipe = make_pipeline(imputer, clf) | |
features = ['feature1', 'feature2'] | |
X, y = train[features], train['label'] | |
X_new = test[features] | |
# Pipeline applies the imputer to X before fitting the classifier | |
pipe.fit(X, y) | |
# Pipeline applies the imputer to X_new before making predictions | |
pipe.predict(X_new) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment