Skip to content

Instantly share code, notes, and snippets.

@wskinner
Last active December 14, 2015 22:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wskinner/5160089 to your computer and use it in GitHub Desktop.
Save wskinner/5160089 to your computer and use it in GitHub Desktop.
A script to calculate grade statistics for UC Berkeley Computer Science courses after removing scores of 0. This gives a more accurate representation of average score and spread.
# Run like this:
# glookup -s $assignment_name -b 0.1 | python betterstats.py
from sys import stdin
import re
def getBuckets():
buckets = []
bucket_patt = r'\s*([0-9]+\.[0-9]+)\s+\-\s*([0-9]+\.[0-9]+)\:\s*([0-9]+).*'
your_patt = r'Your score\:.*?([0-9]+\.[0-9]+)'
your_score = -1
for line in stdin.readlines():
your_match = re.search(your_patt, line)
if your_match:
your_score = float(your_match.groups(1)[0])
match = re.search(bucket_patt, line)
if match and float(match.groups(1)[0]) > 0.0:
buckets.append(tuple(map(float, match.groups())))
getStats(buckets, your_score)
def getStats(buckets, your_score):
total_scores = 1.0*sum([bucket[2] for bucket in buckets])
total_points = 1.0*sum([bucket[0]*bucket[2] for bucket in buckets])
mean = total_points/total_scores
quartiles = [int(total_scores*q) for q in (.25, .5, .75)]
quartile_scores = []
minimum = -1
maximum = -1
sofar = 0
sum_square_diffs = 0
your_rank = total_scores
for bucket in buckets:
if your_score >= bucket[0]:
your_rank -= bucket[2]
if minimum < 0 and bucket[2] > 0:
minimum = bucket[0]
if bucket[2] > 0:
maximum = bucket[0]
sofar += bucket[2]
if len(quartiles) > 0 and sofar >= quartiles[0]:
quartiles.pop(0)
quartile_scores.append(bucket[0])
sum_square_diffs += bucket[2]*(mean - bucket[0] + .5)**2
standard_deviation = (sum_square_diffs/total_scores)**.5
results = """
With scores of 0 removed:
-------------------------
Your Score: %f (#%d of %d)
Mean: %f
Standard Deviation: %f
1st Quartile: %f
2nd Quartile: %f
3rd Quartile: %f
Minimum: %f
Maximum: %f
""" % (your_score, your_rank, total_scores, mean, standard_deviation,
quartile_scores[0], quartile_scores[1], quartile_scores[2], minimum, maximum)
print results
if __name__ == '__main__':
getBuckets()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment