Created
November 6, 2016 12:12
-
-
Save Shinichi-Nakagawa/8ff55af83390fcd2e2dd34bcb914868c to your computer and use it in GitHub Desktop.
PyCon mini Hiroshimaピタゴラス勝率サンプル
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python 3系(3.4以上)で動く想定です(この後のコードも) | |
# $ pip install ipython pandas beautifulsoup4 numpy lxml html5lib | |
# ipythonを立ち上げる(慣れている方はJupyterなどで) | |
# $ ipython | |
# Pandasでセリーグのピタゴラス勝率を出す | |
import pandas as pd | |
import numpy as np | |
# 順位表を読み込む(サイトはお察し) | |
df = pd.read_html('http://baseball.yahoo.co.jp/npb/standings/') | |
# セ・リーグ順位表を取得 | |
df_cl = df[0].drop([0]) # 最初の行はゴミなので捨てる | |
# 適切な列名をつける(扱いを楽にするため英語で) | |
df_cl.columns = ['rank', 'name', 'games', 'win', 'lose', 'draw', 'pct', 'gb', 're_games', 'r', 'er', 'hr', 'sb', 'ba', 'era'] | |
# 計算に使う指標にデータ型をつける | |
df_cl['win'] = df_cl['win'].fillna(0).astype(np.int64) # 勝利 | |
df_cl['lose'] = df_cl['lose'].fillna(0).astype(np.int64) # 敗戦 | |
df_cl['pct'] = df_cl['pct'].fillna(0).astype(np.float64) # 勝率 | |
df_cl['r'] = df_cl['r'].fillna(0).astype(np.int64) # 得点 | |
df_cl['er'] = df_cl['er'].fillna(0).astype(np.int64) # 失点 | |
# 得失点差 | |
df_cl['difference'] = df_cl['r'] - df_cl['er'] | |
# ピタゴラス勝率 | |
df_cl['pythagorean_win_per'] = (df_cl['r'] ** 2) / (df_cl['r'] ** 2 + df_cl['er'] ** 2) | |
# ピタゴラス勝率に当てはめた勝敗 | |
df_cl['pythagorean_win'] = (df_cl['pythagorean_win_per'] * 143).fillna(0).astype(np.int64) | |
df_cl['pythagorean_lose'] = 143 - df_cl['pythagorean_win'] | |
# ピタゴラス勝率順で順位を出す | |
df_cl.sort_values(by='pythagorean_win_per', ascending=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment