Last active
August 10, 2016 06:09
-
-
Save joshuata/d5f0f5b5ca8114df636868a9080d7012 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import re | |
# Read csv file in to dataframe, set '-' to be empty | |
df = pd.read_csv('gradebook.csv', na_values=['-']) | |
# Fill empty entries with 0 | |
df = df.fillna(0.0) | |
# Regex matcher for Assessment column names | |
matcher = re.compile('(Assessment #[0-9]+)[a-z]') | |
for col in list(df.columns.values): | |
grouped = matcher.match(col) | |
# If the column is an assessment grade | |
if grouped: | |
parent = grouped.group(1) | |
# Set "Assesment #XX" to be the max of the parent and the "Assesment #XX(b|c)" | |
df[parent] = df[[parent, col]].max(axis=1) | |
# Remove extra column | |
del df[col] | |
# Sorts columns to be in order | |
cols = list(df.columns.values) | |
assignments = [val for val in cols if val.startswith('Assessment')] | |
assignments.sort() | |
student = [val for val in cols if not val.startswith('Assessment')] | |
cols = student + assignments | |
df = df[cols] | |
# Write dataframe to file | |
df.to_csv('gradebook_fixed.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment