Last active
August 29, 2015 14:08
-
-
Save luser/8f1b6c52a2e0d9869e72 to your computer and use it in GitHub Desktop.
Running group data mangler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Hello Mozilla runners! | |
Mozlandia is nearly upon us, thanks for signing up for a running group! You've been placed in Group %(num)d, which is a distance of %(distance)s and an average pace of %(pace)s. This is all generated from a Python script, so consider it a helpful suggestion more than anything! | |
Your fellow runners in this group are all included in the To: line of this email. I'm hoping that you will be able to self-organize runs based on this, but if you need assistance feel free to stop into irc.mozilla.org #running or email me directly. You can also use this etherpad I've created: https://etherpad.mozilla.org/portland-running-groups if you want to make your outings available to anyone. | |
Regards, | |
-Ted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
import re | |
import sys | |
import urllib | |
from collections import defaultdict | |
from cluster import HierarchicalClustering, KMeansClustering | |
from itertools import count | |
from operator import attrgetter, itemgetter | |
KM_PER_MILE = 1.60934 | |
# This is to work around KMeansClustering being silly for tuples that aren't vectors. | |
class person: | |
def __init__(self, email, pace): | |
self.email = email | |
self.pace = pace | |
def __len__(self): | |
return 1 | |
def __getitem__(self, x): | |
return self.pace | |
def person_distance(x, y): | |
return abs(x[0] - y[0]) | |
def normalize_time(t, units): | |
if units not in ('mi', 'km'): | |
raise RuntimeError('Unknown unit: %s' % units) | |
conversion = 1 if units == 'mi' else KM_PER_MILE | |
m = re.match('(\d+)(:(\d+))?', t) | |
if m: | |
mins = int(m.group(1)) | |
secs = int(m.group(3)) if m.group(3) is not None else 0 | |
return conversion * (mins + (secs / 60.0)) | |
raise RuntimeError("Can't parse time: %s" % t) | |
def fmt_pace(pace): | |
p = int(pace) | |
return "%02d:%02d" % (p, 60 * (pace - p)) | |
def nice_distance(distance): | |
return "%d mi (%d km)" % (distance, int(round(distance * KM_PER_MILE))) | |
def nice_pace(pace): | |
return "%s min/mi (%s min/km)" % (fmt_pace(pace), fmt_pace(pace / KM_PER_MILE)) | |
def make_groups(source): | |
# Bin by distance, then group by pace per bin. | |
bins = defaultdict(list) | |
f = urllib.urlopen(sys.argv[1]) if sys.argv[1].startswith('http') else open(sys.argv[1], 'rb') | |
for i, row in enumerate(csv.reader(f)): | |
if i == 0: | |
continue | |
distance, who, pace, units = row[1], row[3], row[4], row[5] | |
if not (pace or units): | |
raise RuntimeError('Missing sanitized pace/units in row %d' % i) | |
distance = int(distance.split()[0]) | |
normpace = normalize_time(pace, units) | |
bins[distance].append(person(who, normpace)) | |
f.close() | |
c = count(1) | |
for distance in sorted(bins.keys()): | |
data = bins[distance] | |
cl = KMeansClustering(data, person_distance) | |
# Try to get about 9 people per cluster. | |
groups = cl.getclusters(int(round(len(data) / 9.0)) if len(data) > 9 else 2) | |
# Pre-calculate mean so we can sort. | |
groups = [(sum(p.pace for p in g) / len(g), g) for g in groups] | |
yield distance, ((c.next(), mean, sorted(members, key=attrgetter('pace'))) for mean, members in sorted(groups, key=itemgetter(0))) | |
def main(): | |
for distance, groups in make_groups(sys.argv[1]): | |
for num, mean, members in groups: | |
print "Group %d: %s, %s avg pace" % (num, nice_distance(distance), nice_pace(mean)) | |
for p in members: | |
print "\t%s (%s)" % (p.email, nice_pace(p.pace)) | |
if __name__ == '__main__': | |
main() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import json | |
import os | |
import smtplib | |
import sys | |
from email.mime.text import MIMEText | |
from runninggroups import make_groups, nice_distance, nice_pace | |
def main(): | |
email_template = open(os.path.join(os.path.dirname(__file__), 'email_template'), 'r').read() | |
config = json.load(open(os.path.join(os.path.dirname(__file__), 'config.json'), 'r')) | |
s = smtplib.SMTP_SSL(config['server'], config['port']) | |
s.login(config['username'], config['password']) | |
for distance, groups in make_groups(sys.argv[1]): | |
for num, mean, members in groups: | |
data = { | |
'num': num, | |
'distance': nice_distance(distance), | |
'pace': nice_pace(mean) | |
} | |
msg = MIMEText(email_template % data) | |
msg['Subject'] = 'Your Mozlandia running group!' | |
msg['From'] = config['from'] | |
to = [m.email for m in members] | |
msg['To'] = ','.join(to) | |
print "Sending group %d email to %d people" % (num, len(to)) | |
s.sendmail(config['from_mail'], to, msg.as_string()) | |
s.quit() | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment