jmquintana79/pipeline_template_scikit.py

## pipeline_template_scikit.py
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
# example models and preprocessors
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression

# X, y

# Numerical features preprocessing
numerical_features = ['age', 'income']
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Categorical features preprocessing
categorical_features = ['gender', 'occupation']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing for numerical and categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

# Create the pipeline with ML
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

# Fit preprocessor + model
pipeline.fit(X, y)
	import pandas as pd
	import numpy as np
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	# example models and preprocessors
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.impute import SimpleImputer
	from sklearn.linear_model import LogisticRegression

	# X, y

	# Numerical features preprocessing
	numerical_features = ['age', 'income']
	numerical_transformer = Pipeline(steps=[
	('imputer', SimpleImputer(strategy='median')),
	('scaler', StandardScaler())
	])

	# Categorical features preprocessing
	categorical_features = ['gender', 'occupation']
	categorical_transformer = Pipeline(steps=[
	('imputer', SimpleImputer(strategy='most_frequent')),
	('onehot', OneHotEncoder(handle_unknown='ignore'))
	])

	# Combine preprocessing for numerical and categorical features
	preprocessor = ColumnTransformer(
	transformers=[
	('num', numerical_transformer, numerical_features),
	('cat', categorical_transformer, categorical_features)
	]
	)

	# Create the pipeline with ML
	pipeline = Pipeline(steps=[
	('preprocessor', preprocessor),
	('classifier', LogisticRegression())
	])

	# Fit preprocessor + model
	pipeline.fit(X, y)