Created
September 12, 2016 02:49
-
-
Save quizzicol/b96ab6b3129e25a0ba47bb69237fdb48 to your computer and use it in GitHub Desktop.
Pandas Facetplot histograms for categorical data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# which columns from the data frame to plot | |
columns_to_plot = df.ix[:, df.columns != 'Customer ID'].columns | |
# facet plot dimensions | |
plot_columns = 5 | |
plot_rows =math.ceil(len(columns_to_plot)/plot_columns) | |
# create figure | |
fig, ax = plt.subplots(plot_rows, plot_columns, figsize=(15,20)) | |
fig.subplots_adjust(hspace=0.6) # adjust vertical spacing between plots | |
# iterate through columns and create each chart | |
for i, column in enumerate(columns_to_plot): | |
# split the title into four-word long lines | |
title = column.split(' ') | |
title = [' '.join(title[i:i+4]) for i in range(0, len(title), 4)] | |
title = '\n'.join(title) | |
# bin the data to create histograms for numeric and categorical data | |
df_ = df[column].value_counts().reset_index().sort_values('index').set_index('index') | |
# create integer list of x_ticks. Can't plot string values | |
x_ticks = [j for j in range(0, len(df_.index))] | |
# we only want about 10 ticks on the x_axis so calcualte the tick interval | |
x_label_count = 1 if len(x_ticks) < 10 else math.ceil(len(x_ticks)/10) | |
# plot the data | |
ax[int(i/plot_columns), i%plot_columns].bar(x_ticks, df_.values) | |
ax[int(i/plot_rows), i%plot_columns].set_title(title) | |
# use the original index data (e.g. including strings) as labels on the x_axis | |
# but, offset their position by 0.4 time the interval between ticks (assumes bars are 0.8 wide) | |
x_ticks = [x + (x_ticks[1] - x_ticks[0])*0.4 for x in x_ticks] | |
ax[int(i/plot_rows), i%plot_columns].set_xticks(x_ticks[::x_label_count]); | |
ax[int(i/plot_rows), i%plot_columns].set_xticklabels(df_.index[::x_label_count], rotation=90); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment