Skip to content

Instantly share code, notes, and snippets.

@jin-zhe
Last active September 7, 2022 07:15
Show Gist options
  • Save jin-zhe/9a4494b5fb6424ec6f678aedb28a42b4 to your computer and use it in GitHub Desktop.
Save jin-zhe/9a4494b5fb6424ec6f678aedb28a42b4 to your computer and use it in GitHub Desktop.
Script for EduRec exports
from datetime import datetime
import os
import pandas as pd
import argparse
'''
Note:
- Entries start on row 3 of EduRec excel exports
- 'Student Number' column is mandatory!
'''
STU_ROST_PREFIX = 'student-roster-Export-'
TUT_GRPS_PREFIX = 'Tutorial Groups (EduRec)-Export-'
def sort_exports(prefix, dir, date_fmt='%d%m%y-%H%M', ext='.xlsx'):
prefix_len = len(prefix)
date_fmt_len = len(date_fmt)
def parse_date(filename):
return datetime.strptime(filename[prefix_len:prefix_len+date_fmt_len], date_fmt)
exports = [f for f in os.listdir(dir) if f.startswith(prefix) and f.endswith(ext)]
exports = [(f,parse_date(f)) for f in exports]
return [f[0] for f in sorted(exports, key=lambda x: x[1])]
def get_diff(prefix, dir, compare_key='Student Number'):
prev_export, curr_export = sort_exports(prefix, dir)[-2:]
prev = pd.read_excel(prev_export, skiprows=range(2))
curr = pd.read_excel(curr_export, skiprows=range(2))
diff_df = pd.merge(prev, curr, on=compare_key, how='outer', indicator='Exist')
diff_df = diff_df.loc[diff_df['Exist'] != 'both']
return diff_df
def generate_mastersheet(dir, output_filename='master_sheet.csv'):
stu_roster = pd.read_excel(sort_exports(STU_ROST_PREFIX, dir)[-1], skiprows=range(2))
tut_roster = pd.read_excel(sort_exports(TUT_GRPS_PREFIX, dir)[-1], skiprows=range(2))
tut_roster['Group'] = tut_roster.apply(lambda row : row['Group'][:3], axis=1)
master_sheet = pd.merge(stu_roster, tut_roster, how="left", on=["Name","User ID"])
master_sheet.fillna('NONE', inplace=True)
master_sheet.columns = map(str.upper, master_sheet.columns)
master_sheet.rename(columns={"GROUP": "TUT", "STUDENT NUMBER": "STU ID", "USER ID": "NUSNET ID"}, inplace=True)
print('Lost souls:')
print(master_sheet.loc[master_sheet['TUT'] == 'NONE'])
master_sheet.to_csv(dir+'/'+output_filename)
def check_tutorials(dir):
print('Tutorial updates:')
print(get_diff(TUT_GRPS_PREFIX, dir))
def check_drops(dir):
def format_row(row):
if row['Exist'] == 'left_only':
row['Exist'] = 'Dropped'
elif row['Exist'] == 'right_only':
row['Exist'] = 'Joined'
return row
diff_df = get_diff(STU_ROST_PREFIX, dir)
diff_df = diff_df.apply(lambda row : format_row(row), axis=1)
print("Dropped students:")
print(diff_df)
def parse_args():
parser = argparse.ArgumentParser(description="")
parser.add_argument('--dir', '-d', default='.', type=str, help='Directory where exported rosters are located.')
parser.add_argument('--generate_mastersheet', '-gm', action='store_true', help='Generate mastersheet for student-tutorial allocations.')
parser.add_argument('--check_tutorials', '-ct', action='store_true', help='Check for changes/movements in tutorial allocations.')
parser.add_argument('--check_drops', '-cd', action='store_true', help='Check for student drops from module.')
return parser.parse_args()
def main():
args = parse_args()
if args.generate_mastersheet:
generate_mastersheet(args.dir)
print()
if args.check_tutorials:
check_tutorials(args.dir)
print()
if args.check_drops:
check_drops(args.dir)
print()
if __name__ == "__main__": main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment