Created
May 3, 2015 16:45
-
-
Save Shinichi-Nakagawa/9e853b41fa39172c8cd8 to your computer and use it in GitHub Desktop.
Pitcherの成績をデータフレーム化して返すクラス
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
__author__ = 'Shinichi Nakagawa' | |
import pandas as pd | |
import numpy as np | |
from retrosheet_analytics import RetroSheetAnalytics | |
class StatsPitcher(object): | |
# 集計対象の年月 | |
FROM_YEAR = 2010 | |
TO_YEAR = 2014 | |
FROM_MONTH = 3 | |
TO_MONTH = 10 | |
def __init__(self): | |
self.rs = RetroSheetAnalytics() | |
def win_of_month(self, player_id, from_year=FROM_YEAR, to_year=TO_YEAR, from_month=FROM_MONTH, to_month=TO_MONTH): | |
""" | |
月ごとの勝利数 | |
:param player_id: 選手ID(Retrosheet) | |
:param from_year: 開始年 | |
:param to_year: 終了年 | |
:param from_month: 開始月 | |
:param to_month: 終了月 | |
:return: DataFrame | |
""" | |
return self._stats_of_month(player_id, from_year, to_year, from_month, to_month, self.rs.games.WIN_PIT_ID) | |
def lose_of_month(self, player_id, from_year=FROM_YEAR, to_year=TO_YEAR, from_month=FROM_MONTH, to_month=TO_MONTH): | |
""" | |
月ごとの敗北数 | |
:param player_id: 選手ID(Retrosheet) | |
:param from_year: 開始年 | |
:param to_year: 終了年 | |
:param from_month: 開始月 | |
:param to_month: 終了月 | |
:return: DataFrame | |
""" | |
return self._stats_of_month(player_id, from_year, to_year, from_month, to_month, self.rs.games.LOSE_PIT_ID) | |
def _stats_of_month(self, player_id, from_year, to_year, from_month, to_month, search_column): | |
""" | |
特定のStatsを月ごとに集計 | |
:param player_id: 選手ID(Retrosheet) | |
:param from_year: 開始年 | |
:param to_year: 終了年 | |
:param from_month: 開始月 | |
:param to_month: 終了月 | |
:param search_column: 検索対象カラム | |
:return: DataFrame | |
""" | |
years = [y for y in range(from_year, to_year+1)] | |
month_stats = [] | |
month = [m for m in range(from_month, to_month+1)] | |
for mm in month: | |
year_stats = [] | |
for yy in years: | |
# 日付はInt型、月初月末の日付で絞る | |
from_date = int('{yy}{mm:>02d}01'.format(yy=yy, mm=mm)) | |
to_date = int('{yy}{mm:>02d}31'.format(yy=yy, mm=mm)) | |
# 試合日(GAME_DT)のfrom/toで絞る | |
df = self.rs.games[ | |
((self.rs.games.GAME_DT >= from_date) & (self.rs.games.GAME_DT <= to_date)) | |
& | |
(search_column == player_id) # 勝利投手「WIN_PIT_ID」、敗戦投手「LOSE_PIT_ID」で一致する想定 | |
] | |
# フィルターした結果の件数 = 勝利数(or敗北数) | |
year_stats.append(len(df)) | |
month_stats.append(year_stats) | |
return pd.DataFrame(np.array(month_stats), index=month, columns=years) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment