Skip to content

Instantly share code, notes, and snippets.

@tdomhan
Created September 8, 2013 18:16
Show Gist options
  • Save tdomhan/6487126 to your computer and use it in GitHub Desktop.
Save tdomhan/6487126 to your computer and use it in GitHub Desktop.
plotting the largest weights of a L2 regularized classifier + their names
import matplotlib.pyplot as plt
import numpy as np
figsize(20,8)
#clf is a sklearn classifier e.g. clf = LogisticRegression()
#vecotorizer is a sklearn vectorizer, e.g. vectorizer = TfidfVectorizer()
#let's get the coefficients:
coef = clf.coef_.ravel()
important = np.argsort(np.abs(coef))[-100:]
vocab = vectorizer.get_feature_names()
important_feature_names = np.array([vocab[idx] for idx in important])
important_feature_values = coef[important]
inds = np.argsort(important_feature_values)[::-1]
important_feature_names = important_feature_names[inds]
important_feature_values = important_feature_values[inds]
ylabel("weight")
ax = plt.gca()
ax.set_xticks(np.arange(len(important_feature_values)))
labels = ax.set_xticklabels(important_feature_names)
plt.setp(labels, rotation=90)
#plt.plot(important_feature_values, marker="o", c=cm.jet)
plt.scatter(np.arange(len(important_feature_values)), important_feature_values, c=important_feature_values,marker="o", cmap=cm.autumn)
print "done"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment