Skip to content

Instantly share code, notes, and snippets.

@lrvdijk
Created September 8, 2015 21:08
Show Gist options
  • Save lrvdijk/25c7e70d73b19af91b7c to your computer and use it in GitHub Desktop.
Save lrvdijk/25c7e70d73b19af91b7c to your computer and use it in GitHub Desktop.
Hockey Team Stats generator
#!/usr/bin/env python3
"""
Graph generator for wiebetaaltwat.nl
====================================
Written by Lucas van Dijk <info@return1.net>
"""
from bs4 import BeautifulSoup
from pickle import dump, load
import argparse
import sys
import re
participant_multiplier_re = re.compile(r'([0-9]+)x')
def create_database(args):
"""
Reads the given HTML files from wiebetaaltwat.nl, and puts all
data in a pickable object
"""
data = []
for file in args.files:
if not file.endswith('.html'):
continue
soup = BeautifulSoup(open(file), "lxml")
payments_by = soup.find_all('td', 'payment-by')
descriptions = soup.find_all('td', 'description')
amounts = soup.find_all('td', 'amount')
participants = soup.find_all('td', 'participants')
for i in range(len(payments_by)):
data.append({
'payment_by': str(payments_by[i].string).strip(),
'description': str(descriptions[i].string).strip(),
'amount': float(amounts[i].string.replace(',', '.')[2:]),
'participants': [participant_multiplier_re.sub('', x.replace('\n', '')).strip()
for x in participants[i].string.split(',')]
})
with open(args.output, 'wb') as f:
dump(data, f)
def create_graphs(args):
# Import module given at command line
graphs = __import__(args.module)
data = None
with open(args.file, "rb") as f:
data = load(f)
funcs = args.graphs
if not funcs:
funcs = [func for func in dir(graphs) if func.endswith('_graph') and not func.startswith('_')]
for funcname in funcs:
if hasattr(graphs, funcname):
func = getattr(graphs, funcname)
fig = func(data)
fig.savefig(funcname + '.png')
if __name__ == '__main__':
argparser = argparse.ArgumentParser(description="Generate graphs from wiebetaaltwat.nl data")
subparsers = argparser.add_subparsers(title='Subcommands',
description='This program can either create the graphs, or update the database')
parser_graphs = subparsers.add_parser('graph')
parser_graphs.set_defaults(func=create_graphs)
parser_graphs.add_argument('-m', '--module', default="graphs", help="Python module where graph generation functions are located")
parser_graphs.add_argument('-f', '--file', default="data.pickle", help="The pickled database file")
parser_graphs.add_argument('graphs', nargs='?', help="Graph generation functions to execute")
parser_db = subparsers.add_parser('updatedb')
parser_db.set_defaults(func=create_database)
parser_db.add_argument("files", nargs='+', help='Files to read from, when creating the database')
parser_db.add_argument('-o', '--output', default='data.pickle', help='Output file for the database')
args = argparser.parse_args(sys.argv[1:])
args.func(args)
"""
Graph generator for wiebetaaltwat.nl
====================================
Written by Lucas van Dijk <info@return1.net>
"""
from pylab import *
import random
import string
def is_game(entry):
game = False
game_classifiers = ['wedstrijd', 'zondag', 'barendrecht', 'kratje']
description = entry['description'].lower()
for classifier in game_classifiers:
if classifier in description:
game = True
break
return game
def is_training(entry):
training = False
training_classifiers = ['training', 'woensdag']
description = entry['description'].lower()
for classifier in training_classifiers:
if classifier in description:
training = True
break
return training
def get_totals(data):
total = 0.0
totals = {}
totals['wedstrijd'] = 0.0
totals['training'] = 0.0
totals['dixo'] = 0.0
totals['toernooi'] = 0.0
for entry in data:
total += entry['amount']
if is_game(entry):
totals['wedstrijd'] += entry['amount']
elif is_training(entry):
totals['training'] += entry['amount']
elif 'dixo' in entry['description'].lower():
totals['dixo'] += entry['amount']
elif 'toernooi' in entry['description'].lower():
totals['toernooi'] += entry['amount']
else:
print(entry['description'], entry['amount'])
totals['overig'] = (total - totals['wedstrijd'] -
totals['training'] - totals['dixo'] - totals['toernooi'])
return (total, totals)
def total_graph(data):
"""
Calculates the total amount of euros spent, and breaks them down
into 'training' and 'wedstrijd'.
"""
(total, totals) = get_totals(data)
fractions = [
(totals['wedstrijd'] / total) * 100,
(totals['training'] / total) * 100,
(totals['dixo'] / total) * 100,
(totals['toernooi'] / total) * 100,
(totals['overig'] / total) * 100
]
labels = ['Wedstrijd', 'Training', 'Dixo', 'Toernooi', 'Overig']
fig = figure(figsize=(10, 10))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
ax.pie(fractions, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90)
ax.set_title('Totaalbedrag van {0:.2f} euro onderverdeeld'.format(total))
return fig
def averages_game_training_graph(data):
"""
Calculates the average amount per person per training/game
"""
averages_training = []
averages_game = []
averages_dixo = []
for entry in data:
average = entry['amount'] / len(entry['participants'])
if is_game(entry):
averages_game.append(average)
elif is_training(entry):
averages_training.append(average)
elif 'dixo' in entry['description'].lower():
averages_dixo.append(average)
fig = figure(figsize=(10, 10))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
average_training = sum(averages_training) / len(averages_training)
average_game = sum(averages_game) / len(averages_game)
average_dixo = sum(averages_dixo) / len(averages_dixo)
colors = ['r', 'g', 'b']
ax.bar(range(3), [average_training, average_game, average_dixo],
align='center', color=colors)
ax.annotate('{0:.2f}'.format(average_training), xy=(0, average_training + 0.1))
ax.annotate('{0:.2f}'.format(average_game), xy=(1, average_game + 0.1 ))
ax.annotate('{0:.2f}'.format(average_dixo), xy=(2, average_dixo + 0.1))
ax.set_ylabel('Euro')
ax.set_title('Gemiddeld aantal euro per persoon per activiteit')
ax.set_xticks(range(3))
ax.set_xticklabels(['Training', 'Wedstrijd', 'Dixo'])
fig.autofmt_xdate()
return fig
def most_payed_graph(data):
"""
Calculates which persons paid the most
"""
total_payed = {}
for entry in data:
average_per_person = entry['amount'] / len(entry['participants'])
for person in entry['participants']:
if not person in total_payed:
total_payed[person] = 0.0
total_payed[person] += average_per_person
fig = figure(figsize=(17, 10))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
colors = ['#{0}'.format("".join([hex(random.randrange(0, 255))[2:].zfill(2) for i in range(3)])) for person in total_payed]
ax.bar(range(len(total_payed)), total_payed.values(), align='center', color=colors)
i = 0
for key, value in total_payed.items():
ax.annotate('{0:.2f}'.format(value), xy=(i-0.5, value+3))
i += 1
ax.set_ylabel('Euro')
ax.set_title('Wie heeft in totaal het meest betaald')
ax.set_xticks(range(len(total_payed)))
ax.set_xticklabels(list(total_payed.keys()))
fig.autofmt_xdate()
return fig
def most_present_graph(data):
"""
Calculates which persons where present the most
"""
total_present = {}
for entry in data:
for person in entry['participants']:
if not person in total_present:
total_present[person] = 0
total_present[person] += 1
fig = figure(figsize=(17, 10))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
colors = ['#{0}'.format("".join([hex(random.randrange(0, 255))[2:].zfill(2) for i in range(3)])) for person in total_present]
ax.bar(range(len(total_present)), total_present.values(), align='center', color=colors)
i = 0
for key, value in total_present.items():
ax.annotate('{0}'.format(value), xy=(i-0.3, value+3))
i += 1
ax.set_ylabel('Aantal keer')
ax.set_title('Wie betaalt er het vaakst mee op wiebetaaltwat.nl')
ax.set_xticks(range(len(total_present)))
ax.set_xticklabels(list(total_present.keys()), size='small')
fig.autofmt_xdate()
return fig
def most_buyed_graph(data):
"""
Calculates which persons buys the most
"""
total_buyed = {}
for entry in data:
if not entry['payment_by'] in total_buyed:
total_buyed[entry['payment_by']] = 0
total_buyed[entry['payment_by']] += entry['amount']
fig = figure(figsize=(17, 10))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
colors = ['#{0}'.format("".join([hex(random.randrange(0, 255))[2:].zfill(2) for i in range(3)])) for person in total_buyed]
ax.bar(range(len(total_buyed)), total_buyed.values(), align='center', color=colors)
i = 0
for key, value in total_buyed.items():
ax.annotate('{0:.2f}'.format(value), xy=(i-0.5, value+5))
i += 1
ax.set_ylabel('Euro')
ax.set_title('Wie heeft het meeste geld ingelegd')
ax.set_xticks(range(len(total_buyed)))
ax.set_xticklabels(list(total_buyed.keys()))
fig.autofmt_xdate()
return fig
import pickle
with open("data.pickle", "rb") as f:
data = pickle.load(f)
for row in data:
if row['payment_by'] == 'Lucas van Dijk':
print(row['amount'], row['description'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment