Skip to content

Instantly share code, notes, and snippets.

@Shinichi-Nakagawa
Created May 3, 2015 16:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Shinichi-Nakagawa/9e853b41fa39172c8cd8 to your computer and use it in GitHub Desktop.
Save Shinichi-Nakagawa/9e853b41fa39172c8cd8 to your computer and use it in GitHub Desktop.
Pitcherの成績をデータフレーム化して返すクラス
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'Shinichi Nakagawa'
import pandas as pd
import numpy as np
from retrosheet_analytics import RetroSheetAnalytics
class StatsPitcher(object):
# 集計対象の年月
FROM_YEAR = 2010
TO_YEAR = 2014
FROM_MONTH = 3
TO_MONTH = 10
def __init__(self):
self.rs = RetroSheetAnalytics()
def win_of_month(self, player_id, from_year=FROM_YEAR, to_year=TO_YEAR, from_month=FROM_MONTH, to_month=TO_MONTH):
"""
月ごとの勝利数
:param player_id: 選手ID(Retrosheet)
:param from_year: 開始年
:param to_year: 終了年
:param from_month: 開始月
:param to_month: 終了月
:return: DataFrame
"""
return self._stats_of_month(player_id, from_year, to_year, from_month, to_month, self.rs.games.WIN_PIT_ID)
def lose_of_month(self, player_id, from_year=FROM_YEAR, to_year=TO_YEAR, from_month=FROM_MONTH, to_month=TO_MONTH):
"""
月ごとの敗北数
:param player_id: 選手ID(Retrosheet)
:param from_year: 開始年
:param to_year: 終了年
:param from_month: 開始月
:param to_month: 終了月
:return: DataFrame
"""
return self._stats_of_month(player_id, from_year, to_year, from_month, to_month, self.rs.games.LOSE_PIT_ID)
def _stats_of_month(self, player_id, from_year, to_year, from_month, to_month, search_column):
"""
特定のStatsを月ごとに集計
:param player_id: 選手ID(Retrosheet)
:param from_year: 開始年
:param to_year: 終了年
:param from_month: 開始月
:param to_month: 終了月
:param search_column: 検索対象カラム
:return: DataFrame
"""
years = [y for y in range(from_year, to_year+1)]
month_stats = []
month = [m for m in range(from_month, to_month+1)]
for mm in month:
year_stats = []
for yy in years:
# 日付はInt型、月初月末の日付で絞る
from_date = int('{yy}{mm:>02d}01'.format(yy=yy, mm=mm))
to_date = int('{yy}{mm:>02d}31'.format(yy=yy, mm=mm))
# 試合日(GAME_DT)のfrom/toで絞る
df = self.rs.games[
((self.rs.games.GAME_DT >= from_date) & (self.rs.games.GAME_DT <= to_date))
&
(search_column == player_id) # 勝利投手「WIN_PIT_ID」、敗戦投手「LOSE_PIT_ID」で一致する想定
]
# フィルターした結果の件数 = 勝利数(or敗北数)
year_stats.append(len(df))
month_stats.append(year_stats)
return pd.DataFrame(np.array(month_stats), index=month, columns=years)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment