Created
May 5, 2016 21:46
-
-
Save mhawksey/105a4f6e5e2dcdba69b8c5e2868da373 to your computer and use it in GitHub Desktop.
Accumulated enrollment and daily growth of FutureLearn data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extension of https://github.com/psychemedia/futurelearnStatsSketches/blob/master/notebooks/FutureLearn%20Stats%20Recipes.ipynb | |
def plot_cumulativeCount(df, group, groupset, index, title, start, end): | |
plt.rc("figure", figsize=(15, 10)) | |
df=df[df[group].isin(groupset)] | |
df=df.reset_index().set_index(index) | |
df.sort_index(inplace=True) | |
df=date_limiter(df, start, end) | |
df['Total enrollments']=range(len(df)) | |
ax=df['Total enrollments'].plot(title=title, color='fuchsia') | |
ax2 = ax.twinx() | |
ax2=df.groupby(df.index.date).size().plot() | |
ax.axvspan(pd.to_datetime(COURSE_START_DATE), pd.to_datetime(COURSE_END_DATE), color='grey', alpha=0.4, lw=0) | |
grey_patch = mpatches.Patch(color='grey', alpha=0.4, label='Course running') | |
acum_line = mlines.Line2D([], [], color='fuchsia', label='Total enrollments') | |
day_line = mlines.Line2D([], [], label='Enrollments by day') | |
ax2.legend(loc=2, prop={'size':13}, handles=[grey_patch, acum_line, day_line]) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Accumulated Enrollments') | |
ax2.set_ylabel('Daily Enrollments') | |
ax.get_yaxis().set_major_formatter( | |
tkr.FuncFormatter(lambda x, p: format(int(x), ','))) | |
ax2.get_yaxis().set_major_formatter( | |
tkr.FuncFormatter(lambda x, p: format(int(x), ','))) | |
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label, ax2.yaxis.label] + | |
ax.get_xticklabels() + ax.get_yticklabels() + ax2.get_yticklabels()): | |
item.set_fontsize(13) | |
ax2.grid(None) | |
align_axis(ax, ax2) | |
# http://stackoverflow.com/a/28527815/1027723 | |
def align_axis(ax1, ax2, step=1): | |
""" Sets both axes to have the same number of gridlines | |
ax1: left axis | |
ax2: right axis | |
step: defaults to 1 and is used in generating a range of values to check new boundary | |
as in np.arange([start,] stop[, step]) | |
""" | |
ax1.set_aspect('auto') | |
ax2.set_aspect('auto') | |
grid_l = len(ax1.get_ygridlines()) # N of gridlines for left axis | |
grid_r = len(ax2.get_ygridlines()) # N of gridlines for right axis | |
grid_m = max(grid_l, grid_r) # Target N of gridlines | |
# Choose the axis with smaller N of gridlines | |
if grid_l < grid_r: | |
y_min, y_max = ax1.get_ybound() # Get current boundaries | |
parts = (y_max - y_min) / (grid_l - 1) # Get current number of partitions | |
left = True | |
elif grid_l > grid_r: | |
y_min, y_max = ax2.get_ybound() | |
parts = (y_max - y_min) / (grid_r - 1) | |
left = False | |
else: | |
return None | |
# Calculate the new boundary for axis: | |
yrange = np.arange(y_max + 1, y_max * 2 + 1, step) # Make a range of potential y boundaries | |
parts_new = (yrange - y_min) / parts # Calculate how many partitions new boundary has | |
y_new = yrange[np.isclose(parts_new, grid_m - 1)] # Find the boundary matching target | |
# Set new boundary | |
if left: | |
return ax1.set_ylim(top=y_new, emit=True, auto=True) | |
else: | |
return ax2.set_ylim(top=y_new, emit=True, auto=True) | |
plot_cumulativeCount(enrolments, | |
'learner_id', | |
enrolled_learners, | |
'enrolled_at', | |
'{} enrolment accumulation'.format(COURSE_SHORTNAME), | |
COURSE_OPEN_REGISTRATION, | |
offsetDays(COURSE_END_DATE,7) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment