Created
October 11, 2019 04:55
-
-
Save BlueRayi/b3e1612ccb968b76da7251d90aa371c8 to your computer and use it in GitHub Desktop.
NPB の TrueSkill によるレーティング
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[47]: | |
import itertools | |
import math | |
from pathlib import Path | |
# In[48]: | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
import trueskill | |
from trueskill import rate_1vs1 | |
# In[49]: | |
def win_probability(team1, team2, env=None): | |
env = env if env else trueskill.global_env() | |
delta_mu = sum(r.mu for r in team1) - sum(r.mu for r in team2) | |
sum_sigma = sum(r.sigma ** 2 for r in itertools.chain(team1, team2)) | |
size = len(team1) + len(team2) | |
denom = math.sqrt(size * (env.beta * env.beta) + sum_sigma) | |
return env.cdf(delta_mu / denom) | |
# In[50]: | |
def expose_and_clip(rating, env=None, minimum=0., maximum=50.): | |
env = env if env else trueskill.global_env() | |
return min(max(minimum, env.expose(rating)), maximum) | |
# In[51]: | |
def sma(src, interval=3): | |
if interval > 0 and interval % 2 == 1: pass | |
else: raise ValueError('Interval must be positive odd number.') | |
margin_num = (interval - 1) // 2 | |
left_margin = np.full(margin_num, src[0]) | |
right_margin = np.full(margin_num, src[-1]) | |
marginal = np.empty(len(src) + 2 * margin_num) | |
marginal[:margin_num] = left_margin | |
marginal[margin_num:-margin_num] = src[:] | |
marginal[-margin_num:] = right_margin | |
kernel = np.ones(interval) / interval | |
dst = np.convolve(marginal, kernel, mode='valid') | |
return dst | |
# In[52]: | |
def pad_by_nan(teams_transitions): | |
games_nums = [] | |
for teams_transition in teams_transitions.values(): | |
games_nums.append(len(teams_transition)) | |
max_game = max(games_nums) | |
for team in teams_transitions: | |
teams_transition = teams_transitions[team] | |
games_dif = max_game - len(teams_transition) | |
if games_dif > 0: | |
nans = np.full(games_dif, np.nan) | |
teams_transitions[team] = np.concatenate((teams_transition,nans,)) | |
# In[53]: | |
year = 2019 | |
# In[54]: | |
teams = [ | |
'Giants', 'Swallows', 'BayStars', 'Dragons', 'Tigers', 'Carp', | |
'Fighters', 'Eagles', 'Lions', 'Marines', 'Buffaloes', 'Hawks',] | |
# In[55]: | |
teams_colors = { | |
'Giants': ('#FF7820','#1E191A','#F79822',), | |
'Swallows': ('#073180','#111C3C','#FFFFFF',), | |
'BayStars': ('#0052CD','#024A92','#E1F3FD',), | |
'Dragons': ('#003595','#042E83','#FFFFFF',), | |
'Tigers': ('#FFE100','#000000','#FFD600',), | |
'Carp': ('#E50012','#E60012','#FFFFFF',), | |
'Fighters': ('#00508F','#000000','#00508F',), | |
'Eagles': ('#870010','#86010F','#F5A902',), | |
'Lions': ('#00214B','#102960','#FFFFFF',), | |
'Marines': ('#000000','#000000','#FFFFFF',), | |
'Buffaloes': ('#202030','#010119','#BBAA31',), | |
'Hawks': ('#FBC700','#000000','#FCC800',), | |
} | |
# In[56]: | |
is_central = { | |
'Giants': True, | |
'Swallows': True, | |
'BayStars': True, | |
'Dragons': True, | |
'Tigers': True, | |
'Carp': True, | |
'Fighters': False, | |
'Eagles': False, | |
'Lions': False, | |
'Marines': False, | |
'Buffaloes': False, | |
'Hawks': False, | |
} | |
# In[57]: | |
draw_csv = Path('./data/draw.csv') | |
draw_data = pd.read_csv(draw_csv).set_index('year').loc[year - 3:year - 1].sum() | |
draw_probability = draw_data['draw'] / draw_data['game'] | |
# In[58]: | |
env = trueskill.TrueSkill(draw_probability=draw_probability) | |
# In[59]: | |
ratings = {} | |
for team in teams: | |
ratings[team] = env.create_rating() | |
# In[60]: | |
ratix_transition = {} | |
mu_transition = {} | |
sigma_transition = {} | |
# In[61]: | |
for team in teams: | |
ratix_transition[team] = [expose_and_clip(ratings[team], env=env),] | |
mu_transition[team] = [ratings[team].mu,] | |
sigma_transition[team] = [ratings[team].sigma,] | |
# In[62]: | |
games_dir = Path('./data/games/') | |
games_csv = games_dir / f'npb_data_{year}.csv' | |
games_df = pd.read_csv(games_csv) | |
# In[63]: | |
for row_tuple in games_df.iterrows(): | |
row = row_tuple[1] | |
home = row['home'] | |
visitor = row['visitor'] | |
hscore = int(row['hscore']) | |
vscore = int(row['vscore']) | |
card = (ratings[home],),(ratings[visitor],), | |
if hscore > vscore: | |
ranks = (0,1,) | |
elif vscore > hscore: | |
ranks = (1,0,) | |
else: | |
ranks = (0,0,) | |
(ratings[home],),(ratings[visitor],), = env.rate(card, ranks=ranks) | |
ratix_transition[home].append(expose_and_clip(ratings[home], env=env),) | |
ratix_transition[visitor].append(expose_and_clip(ratings[visitor], env=env),) | |
mu_transition[home].append(ratings[home].mu,) | |
mu_transition[visitor].append(ratings[visitor].mu,) | |
sigma_transition[home].append(ratings[home].sigma,) | |
sigma_transition[visitor].append(ratings[visitor].sigma,) | |
# In[64]: | |
for team in teams: | |
ratix_transition[team].reverse() | |
mu_transition[team].reverse() | |
sigma_transition[team].reverse() | |
# In[65]: | |
plt.figure(0) | |
plt.title('NPB Rating', fontsize=16) | |
for team in teams: | |
x = -np.arange(len(ratix_transition[team])) | |
y = sma(ratix_transition[team], 5) | |
plt.plot(x, y, label=team, | |
marker='.', ms=10, markevery=10, | |
c=teams_colors[team][0], | |
mec=teams_colors[team][1], | |
mfc=teams_colors[team][2], | |
ls='-' if is_central[team] else '--') | |
plt.xlabel('Recent Games', fontsize=14) | |
plt.ylabel('TrueSkill', fontsize=14) | |
# plt.ylim(17, 25) | |
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0) | |
plt.savefig('./npb.svg') | |
# In[66]: | |
pad_by_nan(ratix_transition) | |
pad_by_nan(mu_transition) | |
pad_by_nan(sigma_transition) | |
# In[67]: | |
result_dir = Path('./result/') | |
result_dir.mkdir(parents=True, exist_ok=True) | |
ratix_csv = result_dir / f'./ratix_{year}.csv' | |
mu_csv = result_dir / f'./mu_{year}.csv' | |
sigma_csv = result_dir / f'sigma_{year}.csv' | |
ratix_df = pd.DataFrame(ratix_transition) | |
ratix_df.index = -ratix_df.index | |
ratix_df.reset_index().to_csv(ratix_csv, index=False) | |
mu_df = pd.DataFrame(mu_transition) | |
mu_df.index = -mu_df.index | |
mu_df.reset_index().to_csv(mu_csv, index=False) | |
sigma_df = pd.DataFrame(sigma_transition) | |
sigma_df.index = -sigma_df.index | |
sigma_df.reset_index().to_csv(sigma_csv, index=False) | |
ratix_df | |
# In[68]: | |
wp_columns = {} | |
for lose_team in teams: | |
lose_rating = ratings[lose_team] | |
wp_cells = {} | |
for win_team in teams: | |
win_rating = ratings[win_team] | |
wp_cells[win_team] = win_probability((win_rating,), (lose_rating,), env=env) | |
wp_columns[lose_team] = wp_cells | |
# In[69]: | |
wp_df = pd.DataFrame(wp_columns) | |
wp_df.index.name = 'win' | |
wp_df.columns.name = 'lose' | |
wp_df | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment