Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
def percentage_stacked_plot(columns_to_plot, super_title):
Prints a 100% stacked plot of the response variable for independent variable of the list columns_to_plot.
columns_to_plot (list of string): Names of the variables to plot
super_title (string): Super title of the visualization
number_of_columns = 2
number_of_rows = math.ceil(len(columns_to_plot)/2)
# create a figure
fig = plt.figure(figsize=(12, 5 * number_of_rows))
fig.suptitle(super_title, fontsize=22, y=.95)
# loop to each column name to create a subplot
for index, column in enumerate(columns_to_plot, 1):
# create the subplot
ax = fig.add_subplot(number_of_rows, number_of_columns, index)
# calculate the percentage of observations of the response variable for each group of the independent variable
# 100% stacked bar plot
prop_by_independent = pd.crosstab(df_telco[column], df_telco['Churn']).apply(lambda x: x/x.sum()*100, axis=1)
prop_by_independent.plot(kind='bar', ax=ax, stacked=True,
rot=0, color=['springgreen','salmon'])
# set the legend in the upper right corner
ax.legend(loc="upper right", bbox_to_anchor=(0.62, 0.5, 0.5, 0.5),
title='Churn', fancybox=True)
# set title and labels
ax.set_title('Proportion of observations by ' + column,
fontsize=16, loc='left')
# eliminate the frame from the plot
spine_names = ('top', 'right', 'bottom', 'left')
for spine_name in spine_names:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment