Last active
October 12, 2023 12:53
-
-
Save jantrienes/13c53b841cdb98f3aaaf5f7147df7a23 to your computer and use it in GitHub Desktop.
Use eli5 to show weights of an sklearn pipeline with ColumnTransformer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.compose import ColumnTransformer | |
from sklearn.datasets import fetch_20newsgroups | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.model_selection import train_test_split | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import MinMaxScaler | |
import eli5 | |
X, y = fetch_20newsgroups(return_X_y=True) | |
X = pd.DataFrame(X, columns=['text']) | |
y = pd.Series(y) | |
X['text_len'] = X['text'].str.len() | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, y, test_size=0.33, random_state=42, stratify=y) | |
pipe = Pipeline([ | |
('features', ColumnTransformer([ | |
('bow', CountVectorizer(), 'text'), | |
('text_len', MinMaxScaler(), ['text_len']), | |
], remainder='drop')), | |
('clf', LogisticRegression()), | |
]) | |
pipe.fit(X_train, y_train) | |
y_pred = pipe.predict(X_test) | |
# Use transformers_ for the fitted estimators. | |
vec = pipe[0].transformers_[0][1] | |
# We cannot call pipe.get_feature_names() because the MinMaxScaler do not have feature names. | |
# Therefore, we append those manually. | |
feature_names = vec.get_feature_names() + ['text_len'] | |
# Show weights of the classifier (at index 1 in pipeline) | |
eli5.show_weights(pipe[1], feature_names=feature_names, top=10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks!