Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mhawksey/105a4f6e5e2dcdba69b8c5e2868da373 to your computer and use it in GitHub Desktop.
Save mhawksey/105a4f6e5e2dcdba69b8c5e2868da373 to your computer and use it in GitHub Desktop.
Accumulated enrollment and daily growth of FutureLearn data
# Extension of https://github.com/psychemedia/futurelearnStatsSketches/blob/master/notebooks/FutureLearn%20Stats%20Recipes.ipynb
def plot_cumulativeCount(df, group, groupset, index, title, start, end):
plt.rc("figure", figsize=(15, 10))
df=df[df[group].isin(groupset)]
df=df.reset_index().set_index(index)
df.sort_index(inplace=True)
df=date_limiter(df, start, end)
df['Total enrollments']=range(len(df))
ax=df['Total enrollments'].plot(title=title, color='fuchsia')
ax2 = ax.twinx()
ax2=df.groupby(df.index.date).size().plot()
ax.axvspan(pd.to_datetime(COURSE_START_DATE), pd.to_datetime(COURSE_END_DATE), color='grey', alpha=0.4, lw=0)
grey_patch = mpatches.Patch(color='grey', alpha=0.4, label='Course running')
acum_line = mlines.Line2D([], [], color='fuchsia', label='Total enrollments')
day_line = mlines.Line2D([], [], label='Enrollments by day')
ax2.legend(loc=2, prop={'size':13}, handles=[grey_patch, acum_line, day_line])
ax.set_xlabel('Date')
ax.set_ylabel('Accumulated Enrollments')
ax2.set_ylabel('Daily Enrollments')
ax.get_yaxis().set_major_formatter(
tkr.FuncFormatter(lambda x, p: format(int(x), ',')))
ax2.get_yaxis().set_major_formatter(
tkr.FuncFormatter(lambda x, p: format(int(x), ',')))
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label, ax2.yaxis.label] +
ax.get_xticklabels() + ax.get_yticklabels() + ax2.get_yticklabels()):
item.set_fontsize(13)
ax2.grid(None)
align_axis(ax, ax2)
# http://stackoverflow.com/a/28527815/1027723
def align_axis(ax1, ax2, step=1):
""" Sets both axes to have the same number of gridlines
ax1: left axis
ax2: right axis
step: defaults to 1 and is used in generating a range of values to check new boundary
as in np.arange([start,] stop[, step])
"""
ax1.set_aspect('auto')
ax2.set_aspect('auto')
grid_l = len(ax1.get_ygridlines()) # N of gridlines for left axis
grid_r = len(ax2.get_ygridlines()) # N of gridlines for right axis
grid_m = max(grid_l, grid_r) # Target N of gridlines
# Choose the axis with smaller N of gridlines
if grid_l < grid_r:
y_min, y_max = ax1.get_ybound() # Get current boundaries
parts = (y_max - y_min) / (grid_l - 1) # Get current number of partitions
left = True
elif grid_l > grid_r:
y_min, y_max = ax2.get_ybound()
parts = (y_max - y_min) / (grid_r - 1)
left = False
else:
return None
# Calculate the new boundary for axis:
yrange = np.arange(y_max + 1, y_max * 2 + 1, step) # Make a range of potential y boundaries
parts_new = (yrange - y_min) / parts # Calculate how many partitions new boundary has
y_new = yrange[np.isclose(parts_new, grid_m - 1)] # Find the boundary matching target
# Set new boundary
if left:
return ax1.set_ylim(top=y_new, emit=True, auto=True)
else:
return ax2.set_ylim(top=y_new, emit=True, auto=True)
plot_cumulativeCount(enrolments,
'learner_id',
enrolled_learners,
'enrolled_at',
'{} enrolment accumulation'.format(COURSE_SHORTNAME),
COURSE_OPEN_REGISTRATION,
offsetDays(COURSE_END_DATE,7)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment