Skip to content

Instantly share code, notes, and snippets.

Last active Aug 12, 2019
What would you like to do?
SKLearn Pipeline with ELI5
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import VectorizerMixin
import eli5
from eli5.lime import TextExplainer
# wrapping up my GPT-2 vectorizing code
class V(VectorizerMixin):
def fit (self, X, y=None):
return self
def transform (self, X):
xout = []
for row in X:
input_ids = torch.tensor([tokenizer.encode(row)])
words = model(input_ids)[0][0]
average_word_vector = []
for word in words:
index = 0
for word_block in word:
if len(average_word_vector) == index:
average_word_vector[index] += float(word_block)
index += 1
index = 0
for word_block in average_word_vector:
average_word_vector[index] /= float(len(words))
index += 1
return np.array(xout)
# train the model via this pipeline
pipe = make_pipeline(LogisticRegressionCV(), V())[:testcutoff], y[:testcutoff])
# ELI5 TextExplainer
te = TextExplainer(random_state=101), pipe.predict_proba)
te.show_prediction(target_names=['known weird', 'less weird'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment