Skip to content

Instantly share code, notes, and snippets.

@shinichi-takayanagi
Created November 13, 2021 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shinichi-takayanagi/7759ae02ff047f071ed63b8e70552d83 to your computer and use it in GitHub Desktop.
Save shinichi-takayanagi/7759ae02ff047f071ed63b8e70552d83 to your computer and use it in GitHub Desktop.
Pipeline, ColumnTransformer, Pandas
# See https://machinelearningmastery.com/columntransformer-for-numerical-and-categorical-data/
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn import set_config
import seaborn as sns
set_config(display='diagram')
df = sns.load_dataset('penguins')
df.dropna(subset = ["body_mass_g"], inplace=True)
X = df.drop("body_mass_g", axis=1)
y = df["body_mass_g"]
numerical_ix = X.select_dtypes(include=['int64', 'float64']).columns
categorical_ix = X.select_dtypes(include=['object', 'bool']).columns
numeric_transformer = Pipeline(steps=[
('impute', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
('impute', SimpleImputer(strategy='constant', fill_value="unknown")),
('onehot2', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(transformers=[
('num', numeric_transformer, numerical_ix),
('cat', categorical_transformer, categorical_ix),
])
# 全体のパイプラインの作成
pipe = Pipeline([
("preprocessor", preprocessor),
("model", LinearRegression())
])
# show
pipe
# Train/Test
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
pipe.fit(X_train, y_train)
print(pipe.score(X_test, y_test))
print('Intercept: \n', pipe["model"].intercept_)
print('Coefficients: \n', pipe["model"].coef_)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment