Skip to content

Instantly share code, notes, and snippets.

@joshuata
Last active August 10, 2016 06:09
Show Gist options
  • Save joshuata/d5f0f5b5ca8114df636868a9080d7012 to your computer and use it in GitHub Desktop.
Save joshuata/d5f0f5b5ca8114df636868a9080d7012 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import re
# Read csv file in to dataframe, set '-' to be empty
df = pd.read_csv('gradebook.csv', na_values=['-'])
# Fill empty entries with 0
df = df.fillna(0.0)
# Regex matcher for Assessment column names
matcher = re.compile('(Assessment #[0-9]+)[a-z]')
for col in list(df.columns.values):
grouped = matcher.match(col)
# If the column is an assessment grade
if grouped:
parent = grouped.group(1)
# Set "Assesment #XX" to be the max of the parent and the "Assesment #XX(b|c)"
df[parent] = df[[parent, col]].max(axis=1)
# Remove extra column
del df[col]
# Sorts columns to be in order
cols = list(df.columns.values)
assignments = [val for val in cols if val.startswith('Assessment')]
assignments.sort()
student = [val for val in cols if not val.startswith('Assessment')]
cols = student + assignments
df = df[cols]
# Write dataframe to file
df.to_csv('gradebook_fixed.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment