Skip to content

Instantly share code, notes, and snippets.

@MarkBaggett
Created March 26, 2023 21:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MarkBaggett/411427eb965e28a99df700adbc853aab to your computer and use it in GitHub Desktop.
Save MarkBaggett/411427eb965e28a99df700adbc853aab to your computer and use it in GitHub Desktop.
Calculate Stats on student lab completions
import numpy as np
import pandas as pd
def objective_to_day(ct):
ct = ct[10:]
return int(ct.split(".")[0])
def objective_to_question(ct):
ct = ct[10:]
return int(ct.split(".")[1])
def objective_to_step(ct):
ct = ct[10:]
return int(ct.split(".")[2])
#read the csv
data = pd.read_csv("attempts.csv")
#Remove Unused Columns
data = data.drop(['user_reference','attempt_status','points_awarded','failure_reason','points_available','difficulty','submitted_flag','submitted_flag_hash'],axis=1)
#Convert the date from string into a date
data['attempted_at'] = pd.to_datetime(data['attempted_at'])
#Fix all the Objective 0
data.loc[data['challenge_title']=="Objective 0","challenge_title"] = "Objective 0.0.0"
#Break down the objective numbers into its parts
data["DAY"] = data.challenge_title.apply(objective_to_day)
data["QUESTION"] = data.challenge_title.apply(objective_to_question)
data["STEP"] = data.challenge_title.apply(objective_to_step)
#Sort the data
data = data.sort_values([ "display_name", "attempted_at"])
data['STEP_UP'] = (data['STEP'].diff()!=1 ).cumsum()
agg_dict = {'attempted_at': ['first', 'last']}
new_cols = data.groupby(['display_name', 'DAY', "QUESTION", "STEP_UP"]).agg(agg_dict)
new_cols.columns = new_cols.columns.map('_'.join)
new_cols = new_cols.reset_index()
data = data.merge(new_cols[['display_name', 'DAY', 'QUESTION', "STEP_UP", 'attempted_at_first', 'attempted_at_last']],
on=['display_name', 'DAY', 'QUESTION', 'STEP_UP'])
data["ELAPSED_TIME"] = data['attempted_at_last'] - data['attempted_at_first']
data['ELAPSED_TIME'] = data['ELAPSED_TIME'].apply(lambda x:pd.Timedelta(x).total_seconds())
#Sort the data
data = data.sort_values([ "display_name", "attempted_at"])
for day in data['DAY'].unique():
for q in data['QUESTION'].unique():
question = data[(data['DAY'] == day) & (data['QUESTION'] == q) & (data['STEP'] == 1)]
# calculate the cutoff values for the 5th and 95th percentiles
cutoff_low = question['ELAPSED_TIME'].quantile(0.10)
cutoff_high = question['ELAPSED_TIME'].quantile(0.90)
trimmed = question.loc[(question['ELAPSED_TIME'] >= cutoff_low) & (question['ELAPSED_TIME'] <= cutoff_high), 'ELAPSED_TIME']
labtime = trimmed.mean() + (trimmed.std() *2)
miss = len(question.loc[(question['ELAPSED_TIME'] > labtime), 'ELAPSED_TIME'])
if len(question) > 0:
print(f"Day {day} Question {q} TIME IN MINUTES:")
print(f" Mean = {trimmed.mean()/60:03.2f}")
print(f" Min = {trimmed.min()/60:03.2f}")
print(f" Max = {trimmed.max()/60:03.2f}")
print(f" STDDEV = {trimmed.std()/60:03.2f}")
print(f" If you run the lab {labtime/60:03.2f} you would miss {miss} of {len(question)} students. ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment