Compute streaks of consecutive batters retired from Retrosheet play-by-play files
import pandas | |
import glob | |
if __name__ == '__main__': | |
events = pandas.concat([ pandas.read_csv(fn) | |
for fn in glob.glob("event*.csv") ], | |
ignore_index=True) | |
events = events[events.BAT_EVENT_FL=="T"] | |
events["BAT_TEAM_ID"] = events["BAT_TEAM_ID"].replace("MON", "WAS") | |
events["BAT_TEAM_ID"] = events["BAT_TEAM_ID"].replace("CAL", "ANA") | |
events["BAT_TEAM_ID"] = events["BAT_TEAM_ID"].replace("FLO", "MIA") | |
games = pandas.concat([ pandas.read_csv(fn) | |
for fn in glob.glob("game*.csv") ], | |
ignore_index=True) | |
events = pandas.merge(events, games, how='left', on='GAME_ID') | |
events.sort([ "RESP_PIT_ID", "BAT_TEAM_ID", "GAME_DT", "GAME_CT" ], | |
inplace=True) | |
events["IS_OUT"] = ((events.EVENT_CD==2) | (events.EVENT_CD==3) | | |
(events.EVENT_CD==19)).astype(int) | |
events["NEW_STREAK"] = ((events.RESP_PIT_ID!=events.RESP_PIT_ID.shift()) | | |
(events.BAT_TEAM_ID!=events.BAT_TEAM_ID.shift()) | | |
(events.IS_OUT!=events.IS_OUT.shift())) | |
events["STREAK_NO"] = events.NEW_STREAK.cumsum() | |
events["STREAK_LEN"] = events.groupby([ "STREAK_NO" ]).cumcount()+1 | |
streaks = events[[ "RESP_PIT_ID", "BAT_TEAM_ID", "GAME_DT", "IS_OUT", "STREAK_NO", "STREAK_LEN" ]] | |
streaks = streaks.groupby([ "RESP_PIT_ID", "BAT_TEAM_ID", "STREAK_NO", "IS_OUT" ]).agg({ "GAME_DT": (min, max), "STREAK_LEN": max }).reset_index() | |
streaks.columns = [ "PITCHER", "OPPONENT", "STREAK_ID", "IS_OUT", | |
"STREAK_LEN", "DATE_START", "DATE_END" ] | |
streaks.sort("STREAK_LEN", ascending=False, inplace=True) | |
people = pandas.read_csv("people.csv") | |
streaks = pandas.merge(streaks, people, how='left', | |
left_on=[ 'PITCHER' ], | |
right_on= [ 'KEY_RETRO' ]) | |
streaks = streaks[[ "PITCHER", "NAME_LAST", "NAME_FIRST", | |
"OPPONENT", "STREAK_LEN", | |
"DATE_START", "DATE_END" ]] | |
streaks[streaks.STREAK_LEN>=27].to_csv("streaks.csv", index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment