Skip to content

Instantly share code, notes, and snippets.

@mapmeld
Last active Aug 12, 2019
Embed
What would you like to do?
SKLearn Pipeline with ELI5
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import VectorizerMixin
import eli5
from eli5.lime import TextExplainer
# wrapping up my GPT-2 vectorizing code
class V(VectorizerMixin):
def fit (self, X, y=None):
return self
def transform (self, X):
xout = []
for row in X:
input_ids = torch.tensor([tokenizer.encode(row)])
words = model(input_ids)[0][0]
average_word_vector = []
for word in words:
index = 0
for word_block in word:
if len(average_word_vector) == index:
average_word_vector.append(0)
average_word_vector[index] += float(word_block)
index += 1
index = 0
for word_block in average_word_vector:
average_word_vector[index] /= float(len(words))
index += 1
xout.append(average_word_vector)
return np.array(xout)
# train the model via this pipeline
pipe = make_pipeline(LogisticRegressionCV(), V())
pipe.fit(x[:testcutoff], y[:testcutoff])
# ELI5 TextExplainer
te = TextExplainer(random_state=101)
te.fit(tweetText, pipe.predict_proba)
te.show_prediction(target_names=['known weird', 'less weird'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment