DavidCain/affiliation_stats.py

## affiliation_stats.py
"""
Gather affiliation statistic from this year's Winter School
"""
from collections import Counter, OrderedDict
import csv

from ws import models
from ws.utils import dates as dateutils


this_year = dateutils.local_now().year
jan_1 = dateutils.jan_1()

affiliation_mapper = OrderedDict([
    ('MU', "MIT undergrad"),
    ('NU', "Non-MIT undergrad"),
    ('MG', "MIT grad student"),
    ('NG', "Non-MIT grad student"),
    ('MA', 'MIT affiliate'),
    ('NA', 'Non-affiliate'),
])

# Find the last person to sign up for and participant in WS trips last season
last_signup = models.SignUp.objects.filter(
    trip__activity='winter_school',
    trip__trip_date__lte=jan_1,
    on_trip=True,
).latest('participant__pk')

# Because we don't store when people sign up (and last years' lecture
# recording was sloppy), use last participant to infer returning participants
# (anyone who created an account before last year's final active participant)
last_participant_pk = last_signup.participant.pk

# Any participant who's been on a trip this year
ws_participants = models.Participant.objects.filter(
    signup__trip__activity='winter_school',
    signup__on_trip=True,
    signup__time_created__gte=jan_1
).distinct()

# Different groups of Winter School Participant objects
cohorts = [
    (
        models.Participant.objects.filter(lectureattendance__year=this_year),
        "Lecture attendants"
    ),
    (
        models.Participant.objects.filter(
            leaderrating__active=True,
            leaderrating__activity='winter_school'
        ),
        "WS leaders"
    ),
    (
        models.Participant.objects.filter(
            trips_led__trip_date__gte=jan_1,
            trips_led__activity='winter_school'
        ).distinct(),
        "WS leaders who led a trip"
    ),
    (
        ws_participants,
        'Trip participants'
    ),
    (
        ws_participants.filter(pk__lte=last_participant_pk),
        'Returning participants'
    ),
    (
        ws_participants.filter(pk__gt=last_participant_pk),
        'First-year participants'
    ),
]


def generate_stats(csv_file):
    """ Print participant stats to stdout, and generate a CSV. """
    header = ['Category'] + list(affiliation_mapper.values())
    csv_writer = csv.DictWriter(csv_file, fieldnames=header, restval=0)
    csv_writer.writeheader()

    # Print participation counts in sorted order, output to CSV
    for pars, label in cohorts:
        row = {'Category': label}

        text_label = '{} ({})'.format(label, pars.count())
        print(text_label)
        print('-' * len(text_label))

        # Just use Python counting instead of Django/SQL aggregation
        # (We'll get duplicates with annotate('affiliation'), and it
        #  doesn't yet support aggregation with 'DISTINCT ON participant.pk')
        affiliations = pars.values_list('affiliation', flat=True)
        for abbrev, count in Counter(affiliations).most_common():
            affiliation_label = affiliation_mapper[abbrev]
            print("{}: {}".format(affiliation_label, count))
            row[affiliation_label] = count

        print('')
        csv_writer.writerow(row)


if __name__ == '__main__':
    with open('affiliation_stats.csv', 'w') as csv_file:
        generate_stats(csv_file)
	"""
	Gather affiliation statistic from this year's Winter School
	"""
	from collections import Counter, OrderedDict
	import csv

	from ws import models
	from ws.utils import dates as dateutils


	this_year = dateutils.local_now().year
	jan_1 = dateutils.jan_1()

	affiliation_mapper = OrderedDict([
	('MU', "MIT undergrad"),
	('NU', "Non-MIT undergrad"),
	('MG', "MIT grad student"),
	('NG', "Non-MIT grad student"),
	('MA', 'MIT affiliate'),
	('NA', 'Non-affiliate'),
	])

	# Find the last person to sign up for and participant in WS trips last season
	last_signup = models.SignUp.objects.filter(
	trip__activity='winter_school',
	trip__trip_date__lte=jan_1,
	on_trip=True,
	).latest('participant__pk')

	# Because we don't store when people sign up (and last years' lecture
	# recording was sloppy), use last participant to infer returning participants
	# (anyone who created an account before last year's final active participant)
	last_participant_pk = last_signup.participant.pk

	# Any participant who's been on a trip this year
	ws_participants = models.Participant.objects.filter(
	signup__trip__activity='winter_school',
	signup__on_trip=True,
	signup__time_created__gte=jan_1
	).distinct()

	# Different groups of Winter School Participant objects
	cohorts = [
	(
	models.Participant.objects.filter(lectureattendance__year=this_year),
	"Lecture attendants"
	),
	(
	models.Participant.objects.filter(
	leaderrating__active=True,
	leaderrating__activity='winter_school'
	),
	"WS leaders"
	),
	(
	models.Participant.objects.filter(
	trips_led__trip_date__gte=jan_1,
	trips_led__activity='winter_school'
	).distinct(),
	"WS leaders who led a trip"
	),
	(
	ws_participants,
	'Trip participants'
	),
	(
	ws_participants.filter(pk__lte=last_participant_pk),
	'Returning participants'
	),
	(
	ws_participants.filter(pk__gt=last_participant_pk),
	'First-year participants'
	),
	]


	def generate_stats(csv_file):
	""" Print participant stats to stdout, and generate a CSV. """
	header = ['Category'] + list(affiliation_mapper.values())
	csv_writer = csv.DictWriter(csv_file, fieldnames=header, restval=0)
	csv_writer.writeheader()

	# Print participation counts in sorted order, output to CSV
	for pars, label in cohorts:
	row = {'Category': label}

	text_label = '{} ({})'.format(label, pars.count())
	print(text_label)
	print('-' * len(text_label))

	# Just use Python counting instead of Django/SQL aggregation
	# (We'll get duplicates with annotate('affiliation'), and it
	# doesn't yet support aggregation with 'DISTINCT ON participant.pk')
	affiliations = pars.values_list('affiliation', flat=True)
	for abbrev, count in Counter(affiliations).most_common():
	affiliation_label = affiliation_mapper[abbrev]
	print("{}: {}".format(affiliation_label, count))
	row[affiliation_label] = count

	print('')
	csv_writer.writerow(row)


	if __name__ == '__main__':
	with open('affiliation_stats.csv', 'w') as csv_file:
	generate_stats(csv_file)