Skip to content

Instantly share code, notes, and snippets.

@ptgolden
Created April 25, 2012 01:55
Show Gist options
  • Save ptgolden/2485460 to your computer and use it in GitHub Desktop.
Save ptgolden/2485460 to your computer and use it in GitHub Desktop.
Parse play-by-play data from basketballvalue.com for use in Gephi
#!/usr/bin/python
import argparse
import csv
import re
parser = argparse.ArgumentParser(
description='Extract plays involving assists from raw play-by-play data')
parser.add_argument('input_file',metavar='FILE')
parser.add_argument('--team', help='Three letter team name to search \
for, e.g. SAC, MEM, NYK')
args = parser.parse_args()
assist_pattern = re.compile(r'''
\[%s\ # Scoring team
(?P<t0_score>\d+)-(?P<t1_score>.*?)\]\ # Team scores
(?P<scorer>(?:[A-Z]\.\ )?\w+)\ # Player who scored
(?P<score_method>.*?)[:].*? # Method of scoring
Assist:\ (?P<assister>.*?)\ \( # Assister
''' % args.team, re.X)
players = {}
def get_player_idx(p):
if p not in players:
players[p] = len(players) + 1
return players[p]
with open(args.input_file, 'rb') as f, open('assists.csv', 'wb') as outfile:
data = csv.reader(f, delimiter='\t')
out = csv.writer(outfile, delimiter='\t')
headers= ['SOURCE', 'TARGET']
out.writerow(headers)
for game, line, time_remaining, play in data:
assisted = re.search(assist_pattern, play)
if not assisted:
continue
p1 = get_player_idx(assisted.group('scorer'))
p2 = get_player_idx(assisted.group('assister'))
out.writerow([p1, p2])
with open('players.csv', 'wb') as players_outfile:
players_out = csv.writer(players_outfile, delimiter='\t')
players_out.writerow(['ID', 'LABEL'])
for player, pid in players.items():
players_out.writerow([pid, player])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment