Skip to content

Instantly share code, notes, and snippets.

@DavidCain
Created February 24, 2017 00:45
Show Gist options
  • Save DavidCain/cbfe7f13a450867d4aa79401f7fd7af8 to your computer and use it in GitHub Desktop.
Save DavidCain/cbfe7f13a450867d4aa79401f7fd7af8 to your computer and use it in GitHub Desktop.
Winter School participation by MIT affiliation and student status
"""
Gather affiliation statistic from this year's Winter School
"""
from collections import Counter, OrderedDict
import csv
from ws import models
from ws.utils import dates as dateutils
this_year = dateutils.local_now().year
jan_1 = dateutils.jan_1()
affiliation_mapper = OrderedDict([
('MU', "MIT undergrad"),
('NU', "Non-MIT undergrad"),
('MG', "MIT grad student"),
('NG', "Non-MIT grad student"),
('MA', 'MIT affiliate'),
('NA', 'Non-affiliate'),
])
# Find the last person to sign up for and participant in WS trips last season
last_signup = models.SignUp.objects.filter(
trip__activity='winter_school',
trip__trip_date__lte=jan_1,
on_trip=True,
).latest('participant__pk')
# Because we don't store when people sign up (and last years' lecture
# recording was sloppy), use last participant to infer returning participants
# (anyone who created an account before last year's final active participant)
last_participant_pk = last_signup.participant.pk
# Any participant who's been on a trip this year
ws_participants = models.Participant.objects.filter(
signup__trip__activity='winter_school',
signup__on_trip=True,
signup__time_created__gte=jan_1
).distinct()
# Different groups of Winter School Participant objects
cohorts = [
(
models.Participant.objects.filter(lectureattendance__year=this_year),
"Lecture attendants"
),
(
models.Participant.objects.filter(
leaderrating__active=True,
leaderrating__activity='winter_school'
),
"WS leaders"
),
(
models.Participant.objects.filter(
trips_led__trip_date__gte=jan_1,
trips_led__activity='winter_school'
).distinct(),
"WS leaders who led a trip"
),
(
ws_participants,
'Trip participants'
),
(
ws_participants.filter(pk__lte=last_participant_pk),
'Returning participants'
),
(
ws_participants.filter(pk__gt=last_participant_pk),
'First-year participants'
),
]
def generate_stats(csv_file):
""" Print participant stats to stdout, and generate a CSV. """
header = ['Category'] + list(affiliation_mapper.values())
csv_writer = csv.DictWriter(csv_file, fieldnames=header, restval=0)
csv_writer.writeheader()
# Print participation counts in sorted order, output to CSV
for pars, label in cohorts:
row = {'Category': label}
text_label = '{} ({})'.format(label, pars.count())
print(text_label)
print('-' * len(text_label))
# Just use Python counting instead of Django/SQL aggregation
# (We'll get duplicates with annotate('affiliation'), and it
# doesn't yet support aggregation with 'DISTINCT ON participant.pk')
affiliations = pars.values_list('affiliation', flat=True)
for abbrev, count in Counter(affiliations).most_common():
affiliation_label = affiliation_mapper[abbrev]
print("{}: {}".format(affiliation_label, count))
row[affiliation_label] = count
print('')
csv_writer.writerow(row)
if __name__ == '__main__':
with open('affiliation_stats.csv', 'w') as csv_file:
generate_stats(csv_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment