Skip to content

Instantly share code, notes, and snippets.

@luser
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luser/8f1b6c52a2e0d9869e72 to your computer and use it in GitHub Desktop.
Save luser/8f1b6c52a2e0d9869e72 to your computer and use it in GitHub Desktop.
Running group data mangler
Hello Mozilla runners!
Mozlandia is nearly upon us, thanks for signing up for a running group! You've been placed in Group %(num)d, which is a distance of %(distance)s and an average pace of %(pace)s. This is all generated from a Python script, so consider it a helpful suggestion more than anything!
Your fellow runners in this group are all included in the To: line of this email. I'm hoping that you will be able to self-organize runs based on this, but if you need assistance feel free to stop into irc.mozilla.org #running or email me directly. You can also use this etherpad I've created: https://etherpad.mozilla.org/portland-running-groups if you want to make your outings available to anyone.
Regards,
-Ted
#!/usr/bin/env python
import csv
import re
import sys
import urllib
from collections import defaultdict
from cluster import HierarchicalClustering, KMeansClustering
from itertools import count
from operator import attrgetter, itemgetter
KM_PER_MILE = 1.60934
# This is to work around KMeansClustering being silly for tuples that aren't vectors.
class person:
def __init__(self, email, pace):
self.email = email
self.pace = pace
def __len__(self):
return 1
def __getitem__(self, x):
return self.pace
def person_distance(x, y):
return abs(x[0] - y[0])
def normalize_time(t, units):
if units not in ('mi', 'km'):
raise RuntimeError('Unknown unit: %s' % units)
conversion = 1 if units == 'mi' else KM_PER_MILE
m = re.match('(\d+)(:(\d+))?', t)
if m:
mins = int(m.group(1))
secs = int(m.group(3)) if m.group(3) is not None else 0
return conversion * (mins + (secs / 60.0))
raise RuntimeError("Can't parse time: %s" % t)
def fmt_pace(pace):
p = int(pace)
return "%02d:%02d" % (p, 60 * (pace - p))
def nice_distance(distance):
return "%d mi (%d km)" % (distance, int(round(distance * KM_PER_MILE)))
def nice_pace(pace):
return "%s min/mi (%s min/km)" % (fmt_pace(pace), fmt_pace(pace / KM_PER_MILE))
def make_groups(source):
# Bin by distance, then group by pace per bin.
bins = defaultdict(list)
f = urllib.urlopen(sys.argv[1]) if sys.argv[1].startswith('http') else open(sys.argv[1], 'rb')
for i, row in enumerate(csv.reader(f)):
if i == 0:
continue
distance, who, pace, units = row[1], row[3], row[4], row[5]
if not (pace or units):
raise RuntimeError('Missing sanitized pace/units in row %d' % i)
distance = int(distance.split()[0])
normpace = normalize_time(pace, units)
bins[distance].append(person(who, normpace))
f.close()
c = count(1)
for distance in sorted(bins.keys()):
data = bins[distance]
cl = KMeansClustering(data, person_distance)
# Try to get about 9 people per cluster.
groups = cl.getclusters(int(round(len(data) / 9.0)) if len(data) > 9 else 2)
# Pre-calculate mean so we can sort.
groups = [(sum(p.pace for p in g) / len(g), g) for g in groups]
yield distance, ((c.next(), mean, sorted(members, key=attrgetter('pace'))) for mean, members in sorted(groups, key=itemgetter(0)))
def main():
for distance, groups in make_groups(sys.argv[1]):
for num, mean, members in groups:
print "Group %d: %s, %s avg pace" % (num, nice_distance(distance), nice_pace(mean))
for p in members:
print "\t%s (%s)" % (p.email, nice_pace(p.pace))
print
if __name__ == '__main__':
main()
#!/usr/bin/env python
import json
import os
import smtplib
import sys
from email.mime.text import MIMEText
from runninggroups import make_groups, nice_distance, nice_pace
def main():
email_template = open(os.path.join(os.path.dirname(__file__), 'email_template'), 'r').read()
config = json.load(open(os.path.join(os.path.dirname(__file__), 'config.json'), 'r'))
s = smtplib.SMTP_SSL(config['server'], config['port'])
s.login(config['username'], config['password'])
for distance, groups in make_groups(sys.argv[1]):
for num, mean, members in groups:
data = {
'num': num,
'distance': nice_distance(distance),
'pace': nice_pace(mean)
}
msg = MIMEText(email_template % data)
msg['Subject'] = 'Your Mozlandia running group!'
msg['From'] = config['from']
to = [m.email for m in members]
msg['To'] = ','.join(to)
print "Sending group %d email to %d people" % (num, len(to))
s.sendmail(config['from_mail'], to, msg.as_string())
s.quit()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment