kayitt/feature_importance.py

## feature_importance.py
import pandas as pd
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import numpy as np

model = LogisticRegression()
# model.fit(...)

my_dict = dict(zip(model.named_steps.tfidf.get_feature_names(), model.named_steps.classifier.coef_.T))
coefs = pd.DataFrame.from_dict(my_dict, orient='index')
coefs.columns = model.named_steps.classifier.classes_

for category in coefs.columns:
    # features "in favor" are those with the largest coefficients
    vals = list(coefs[category].nlargest(10).values) + list(
        coefs[category].nsmallest(5).sort_values(ascending=False).values)

    # features "against" are those with the smallest coefficients
    names = list(coefs[category].nlargest(10).index) + list(
        coefs[category].nsmallest(5).sort_values(ascending=False).index)

    # features "in favour" of the category are colored green, those "against" are colored red
    colors = ['green' if x > 0 else 'red' for x in vals]

    # plot
    vals.reverse()
    names.reverse()
    fig = plt.figure(figsize=(15, 10))
    pos = np.arange(len(vals)) + .5
    plt.barh(pos, vals, align='center', color=colors)
    plt.yticks(pos, names)
    title = f'Local explanation for class {category}'
    plt.title(title)
    plt.show()
	import pandas as pd
	from sklearn.linear_model import LogisticRegression
	import matplotlib.pyplot as plt
	import numpy as np

	model = LogisticRegression()
	# model.fit(...)

	my_dict = dict(zip(model.named_steps.tfidf.get_feature_names(), model.named_steps.classifier.coef_.T))
	coefs = pd.DataFrame.from_dict(my_dict, orient='index')
	coefs.columns = model.named_steps.classifier.classes_

	for category in coefs.columns:
	# features "in favor" are those with the largest coefficients
	vals = list(coefs[category].nlargest(10).values) + list(
	coefs[category].nsmallest(5).sort_values(ascending=False).values)

	# features "against" are those with the smallest coefficients
	names = list(coefs[category].nlargest(10).index) + list(
	coefs[category].nsmallest(5).sort_values(ascending=False).index)

	# features "in favour" of the category are colored green, those "against" are colored red
	colors = ['green' if x > 0 else 'red' for x in vals]

	# plot
	vals.reverse()
	names.reverse()
	fig = plt.figure(figsize=(15, 10))
	pos = np.arange(len(vals)) + .5
	plt.barh(pos, vals, align='center', color=colors)
	plt.yticks(pos, names)
	title = f'Local explanation for class {category}'
	plt.title(title)
	plt.show()