phil-lopreiato/oprstats.py

## oprstats.py
#! /usr/bin/python

import argparse
import csv
import json
import urllib2

from collections import defaultdict

"""
A script to try and correlate district participation with team performance. Computes:
1) Average OPR of District teams and Regional teams qualifying for CMP, compared year to year
2) Average ranking of District and Regional teams attending CMP
3) Amount of teams attending CMP that played in elims at least once at a regional/district event

Methodology:
 - For the 2007 - 2016 FRC seasons, load each event and store:
  - a mapping of {team_key: district}
  - a mapping of {team_key: [oprs for REGIONAL/DISTRICT/DCMP]}
  - a mapping of {team_key: cmp opr}, if team attended CMP
  - a mapping of {team_key: [absolute ranking at REGIONAL/DISTRICT/DCMP]}
  - a mapping of {team_key: cmp rank}, if team attended
  - a mapping of {team_key: [ranking percent (rank/total #teams) at REGIONAL/DISTRICT/DCMP]}
  - a mapping of {team_key: [alliance number at REGIONAL/DISTRICT/DCMP]}
  - a mapping of {team_key: cmp alliance number}, if qualified
- Generate tables:
  - [team_key, year, district, in-season OPR, cmp OPR]
  - [team_key, year, district, normalized in-season ranking, normalized cmp ranking]
  - [team_key, year, district, % in-season events in elims, in cmp alliance?]
"""

BASE_URL = 'https://www.thebluealliance.com/api/v2/{}'
APP_HEADER = 'X-TBA-App-Id'
APP_ID = 'plnyyanks:oprandcmp:v0.1'

# See https://github.com/the-blue-alliance/the-blue-alliance/blob/master/consts/event_type.py
# [REGIONAL, DISTRICT, DCMP]
VALID_EVENT_TYPES = [0, 1, 2]

# See https://github.com/the-blue-alliance/the-blue-alliance/blob/master/consts/district_type.py
VALID_DISTRICT_SHORTS = ['chs', 'pch', 'in', 'fim', 'mar', 'nc', 'ne', 'pnw']


def fetch_endpoint(endpoint):
    full_url = BASE_URL.format(endpoint)
    print "Fetching {}".format(full_url)
    url = urllib2.Request(full_url, headers={APP_HEADER: APP_ID, 'User-agent': 'Mozilla/5.0'})
    response = urllib2.urlopen(url)
    return json.loads(response.read())


def fetch_event_keys_in_year(year):
    api_events = fetch_endpoint("events/{}".format(year))
    inseason_keys = [event["key"] for event in api_events if event["event_type"] in VALID_EVENT_TYPES]
    cmp_keys = [event["key"] for event in api_events if event["event_type"] == 3]  # CMP_DIVISION
    return (inseason_keys, cmp_keys)


def fetch_event_info(event_key):
    return fetch_endpoint("event/{}".format(event_key))


def fetch_event_teams(event_key):
    return [team["key"] for team in fetch_endpoint("event/{}/teams".format(event_key))]


def fetch_event_rankings(event_key):
    return fetch_endpoint("event/{}/rankings".format(event_key))


def fetch_event_stats(event_key):
    return fetch_endpoint("event/{}/stats".format(event_key))


def fetch_district_team_keys(year, short):
    return [team["key"] for team in fetch_endpoint("district/{}/{}/teams".format(short, year))]


def alliance_contains_team(alliances, team_key):
    if not alliances:
        return None
    return any(team_key in alliance["picks"] for alliance in alliances)


def mean(data):
    return sum(data) / len(data)


# Returns the % of other teams
# that the target team seeded higher than
def normalized_ranking(rankings, target_team, team_count):
    if not rankings or len(rankings) == 1:
        return None
    raw_rank = 0
    for ranking in rankings:
        if target_team[3:] == ranking[1]:
            break
        raw_rank += 1
    norm = (team_count - raw_rank) / float(team_count - 1)
    return norm

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--start", help="First competition season to test", type=int, default=2016)
    parser.add_argument("--end", help="Last competition season to test, inclusive", type=int, default=2016)
    args = parser.parse_args()

    team_districts = defaultdict(dict)
    inseason_oprs = defaultdict(lambda: defaultdict(list))
    cmp_oprs = defaultdict(dict)
    inseason_rankings = defaultdict(lambda: defaultdict(list))
    cmp_rankings = defaultdict(dict)
    inseason_alliances = defaultdict(lambda: defaultdict(list))
    cmp_alliances = defaultdict(dict)

    # For each year...
    for year in range(args.start, args.end + 1):
        # Get all district teams from that year
        for district_key in VALID_DISTRICT_SHORTS:
            team_keys = fetch_district_team_keys(year, district_key)
            for team_key in team_keys:
                team_districts[year][team_key] = district_key

        # Get event keys relevant for this year
        inseason_keys, cmp_keys = fetch_event_keys_in_year(year)
        for event_key in inseason_keys:
            teams = fetch_event_teams(event_key)
            alliances = fetch_event_info(event_key)["alliances"]
            rankings = fetch_event_rankings(event_key)
            oprs = fetch_event_stats(event_key)["oprs"]

            for team in teams:
                team_num = team[3:]
                inseason_alliances[year][team].append(alliance_contains_team(alliances, team))
                inseason_oprs[year][team].append(oprs[team_num] if team_num in oprs else None)
                inseason_rankings[year][team].append(normalized_ranking(rankings, team, len(teams)))

        for event_key in cmp_keys:
            teams = fetch_event_teams(event_key)
            alliances = fetch_event_info(event_key)["alliances"]
            rankings = fetch_event_rankings(event_key)
            oprs = fetch_event_stats(event_key)["oprs"]

            for team in teams:
                team_num = team[3:]
                cmp_alliances[year][team] = alliance_contains_team(alliances, team)
                cmp_oprs[year][team] = oprs[team_num] if team_num in oprs else None
                cmp_rankings[year][team] = normalized_ranking(rankings, team, len(teams))

    with open('alliances.csv', 'wb') as csvfile:
        alliance_writer = csv.writer(csvfile)
        for year, team_maps in inseason_alliances.iteritems():
            for team, results in team_maps.iteritems():
                district = team_districts[year].get(team, None)
                clean_results= [result for result in results if result is not None]
                percent_in_alliances = results.count(True) / float(len(clean_results)) if len(clean_results) > 0 else 0
                alliance_writer.writerow([year, team, district, percent_in_alliances, cmp_alliances[year][team] if team in cmp_alliances[year] else False])

    with open('rankings.csv', 'wb') as csvfile:
        rankings_writer = csv.writer(csvfile)
        for year, team_maps in inseason_rankings.iteritems():
            for team, results in team_maps.iteritems():
                district = team_districts[year].get(team, None)
                clean_results = [result for result in results if result is not None]
                avg_norm_rank = sum(clean_results ) / float(len(clean_results)) if len(clean_results) > 0 else 0
                rankings_writer.writerow([year, team, district, avg_norm_rank, cmp_rankings[year][team] if team in cmp_rankings[year] else None])

    with open('opr.csv', 'wb') as csvfile:
        opr_writer = csv.writer(csvfile)
        for year, team_maps in inseason_oprs.iteritems():
            for team, results in team_maps.iteritems():
                district = team_districts[year].get(team, None)
                clean_results = [result for result in results if result if result is not None]
                avg_opr = sum(clean_results) / float(len(clean_results)) if len(clean_results) > 0 else 0
                max_opr = max(clean_results) if clean_results else 0
                opr_writer.writerow([year, team, district, avg_opr, max_opr, cmp_oprs[year][team] if team in cmp_oprs[year] else None])
	#! /usr/bin/python

	import argparse
	import csv
	import json
	import urllib2

	from collections import defaultdict

	"""
	A script to try and correlate district participation with team performance. Computes:
	1) Average OPR of District teams and Regional teams qualifying for CMP, compared year to year
	2) Average ranking of District and Regional teams attending CMP
	3) Amount of teams attending CMP that played in elims at least once at a regional/district event

	Methodology:
	- For the 2007 - 2016 FRC seasons, load each event and store:
	- a mapping of {team_key: district}
	- a mapping of {team_key: [oprs for REGIONAL/DISTRICT/DCMP]}
	- a mapping of {team_key: cmp opr}, if team attended CMP
	- a mapping of {team_key: [absolute ranking at REGIONAL/DISTRICT/DCMP]}
	- a mapping of {team_key: cmp rank}, if team attended
	- a mapping of {team_key: [ranking percent (rank/total #teams) at REGIONAL/DISTRICT/DCMP]}
	- a mapping of {team_key: [alliance number at REGIONAL/DISTRICT/DCMP]}
	- a mapping of {team_key: cmp alliance number}, if qualified
	- Generate tables:
	- [team_key, year, district, in-season OPR, cmp OPR]
	- [team_key, year, district, normalized in-season ranking, normalized cmp ranking]
	- [team_key, year, district, % in-season events in elims, in cmp alliance?]
	"""

	BASE_URL = 'https://www.thebluealliance.com/api/v2/{}'
	APP_HEADER = 'X-TBA-App-Id'
	APP_ID = 'plnyyanks:oprandcmp:v0.1'

	# See https://github.com/the-blue-alliance/the-blue-alliance/blob/master/consts/event_type.py
	# [REGIONAL, DISTRICT, DCMP]
	VALID_EVENT_TYPES = [0, 1, 2]

	# See https://github.com/the-blue-alliance/the-blue-alliance/blob/master/consts/district_type.py
	VALID_DISTRICT_SHORTS = ['chs', 'pch', 'in', 'fim', 'mar', 'nc', 'ne', 'pnw']


	def fetch_endpoint(endpoint):
	full_url = BASE_URL.format(endpoint)
	print "Fetching {}".format(full_url)
	url = urllib2.Request(full_url, headers={APP_HEADER: APP_ID, 'User-agent': 'Mozilla/5.0'})
	response = urllib2.urlopen(url)
	return json.loads(response.read())


	def fetch_event_keys_in_year(year):
	api_events = fetch_endpoint("events/{}".format(year))
	inseason_keys = [event["key"] for event in api_events if event["event_type"] in VALID_EVENT_TYPES]
	cmp_keys = [event["key"] for event in api_events if event["event_type"] == 3] # CMP_DIVISION
	return (inseason_keys, cmp_keys)


	def fetch_event_info(event_key):
	return fetch_endpoint("event/{}".format(event_key))


	def fetch_event_teams(event_key):
	return [team["key"] for team in fetch_endpoint("event/{}/teams".format(event_key))]


	def fetch_event_rankings(event_key):
	return fetch_endpoint("event/{}/rankings".format(event_key))


	def fetch_event_stats(event_key):
	return fetch_endpoint("event/{}/stats".format(event_key))


	def fetch_district_team_keys(year, short):
	return [team["key"] for team in fetch_endpoint("district/{}/{}/teams".format(short, year))]


	def alliance_contains_team(alliances, team_key):
	if not alliances:
	return None
	return any(team_key in alliance["picks"] for alliance in alliances)


	def mean(data):
	return sum(data) / len(data)


	# Returns the % of other teams
	# that the target team seeded higher than
	def normalized_ranking(rankings, target_team, team_count):
	if not rankings or len(rankings) == 1:
	return None
	raw_rank = 0
	for ranking in rankings:
	if target_team[3:] == ranking[1]:
	break
	raw_rank += 1
	norm = (team_count - raw_rank) / float(team_count - 1)
	return norm

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--start", help="First competition season to test", type=int, default=2016)
	parser.add_argument("--end", help="Last competition season to test, inclusive", type=int, default=2016)
	args = parser.parse_args()

	team_districts = defaultdict(dict)
	inseason_oprs = defaultdict(lambda: defaultdict(list))
	cmp_oprs = defaultdict(dict)
	inseason_rankings = defaultdict(lambda: defaultdict(list))
	cmp_rankings = defaultdict(dict)
	inseason_alliances = defaultdict(lambda: defaultdict(list))
	cmp_alliances = defaultdict(dict)

	# For each year...
	for year in range(args.start, args.end + 1):
	# Get all district teams from that year
	for district_key in VALID_DISTRICT_SHORTS:
	team_keys = fetch_district_team_keys(year, district_key)
	for team_key in team_keys:
	team_districts[year][team_key] = district_key

	# Get event keys relevant for this year
	inseason_keys, cmp_keys = fetch_event_keys_in_year(year)
	for event_key in inseason_keys:
	teams = fetch_event_teams(event_key)
	alliances = fetch_event_info(event_key)["alliances"]
	rankings = fetch_event_rankings(event_key)
	oprs = fetch_event_stats(event_key)["oprs"]

	for team in teams:
	team_num = team[3:]
	inseason_alliances[year][team].append(alliance_contains_team(alliances, team))
	inseason_oprs[year][team].append(oprs[team_num] if team_num in oprs else None)
	inseason_rankings[year][team].append(normalized_ranking(rankings, team, len(teams)))

	for event_key in cmp_keys:
	teams = fetch_event_teams(event_key)
	alliances = fetch_event_info(event_key)["alliances"]
	rankings = fetch_event_rankings(event_key)
	oprs = fetch_event_stats(event_key)["oprs"]

	for team in teams:
	team_num = team[3:]
	cmp_alliances[year][team] = alliance_contains_team(alliances, team)
	cmp_oprs[year][team] = oprs[team_num] if team_num in oprs else None
	cmp_rankings[year][team] = normalized_ranking(rankings, team, len(teams))

	with open('alliances.csv', 'wb') as csvfile:
	alliance_writer = csv.writer(csvfile)
	for year, team_maps in inseason_alliances.iteritems():
	for team, results in team_maps.iteritems():
	district = team_districts[year].get(team, None)
	clean_results= [result for result in results if result is not None]
	percent_in_alliances = results.count(True) / float(len(clean_results)) if len(clean_results) > 0 else 0
	alliance_writer.writerow([year, team, district, percent_in_alliances, cmp_alliances[year][team] if team in cmp_alliances[year] else False])

	with open('rankings.csv', 'wb') as csvfile:
	rankings_writer = csv.writer(csvfile)
	for year, team_maps in inseason_rankings.iteritems():
	for team, results in team_maps.iteritems():
	district = team_districts[year].get(team, None)
	clean_results = [result for result in results if result is not None]
	avg_norm_rank = sum(clean_results ) / float(len(clean_results)) if len(clean_results) > 0 else 0
	rankings_writer.writerow([year, team, district, avg_norm_rank, cmp_rankings[year][team] if team in cmp_rankings[year] else None])

	with open('opr.csv', 'wb') as csvfile:
	opr_writer = csv.writer(csvfile)
	for year, team_maps in inseason_oprs.iteritems():
	for team, results in team_maps.iteritems():
	district = team_districts[year].get(team, None)
	clean_results = [result for result in results if result if result is not None]
	avg_opr = sum(clean_results) / float(len(clean_results)) if len(clean_results) > 0 else 0
	max_opr = max(clean_results) if clean_results else 0
	opr_writer.writerow([year, team, district, avg_opr, max_opr, cmp_oprs[year][team] if team in cmp_oprs[year] else None])