Skip to content

Instantly share code, notes, and snippets.

@andresAlvarado
Last active April 3, 2017 01:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save andresAlvarado/6c4b32e329dbf4b0aefa761789a1deb0 to your computer and use it in GitHub Desktop.
Save andresAlvarado/6c4b32e329dbf4b0aefa761789a1deb0 to your computer and use it in GitHub Desktop.
library( package = 'dplyr' )
library( package = 'data.table' )
library( package = 'reshape2' )
# Column names
l_e_names <- c( 'GAME_ID','CATCHER','FIRST_BASEMAN','SECOND_BASEMAN'
, 'THIRD_BASEMAN','SHORTSTOP','LEFT_FIELDER','CENTER_FIELDER'
, 'RIGHT_FIELDER', 'EVENT_OUTS_CT' ,'TEAM_ID'
)
# Column types
l_e_cols <- c( 'character'
, rep( x = 'NULL', times = 17 )
, rep( x = 'character', times = 8 )
, rep( x = 'NULL', times = 14 )
, 'numeric'
, rep( x = 'NULL', times = 58 )
, 'character'
, rep( x = 'NULL', times = 58 )
)
# Push the 1970 season into the environment.
d_e_1970 <- fread( input = 'all1970.csv'
, sep = ','
, colClasses = l_e_cols
, col.names = l_e_names
)
# Transform the dataset:
# IDs: GAME_ID, OUTS & FIELDING TEAM
# Measures: All fielding positions, except for pitcher
d_r_1970 <- melt( data = d_e_1970
, id.vars = l_e_names[ c( 1, 10, 11 ) ]
, measure.vars = l_e_names[ -c( 1, 10, 11 ) ]
, variable.name = 'POSITION'
, value.name = 'PLAYER_ID'
, variable.factor = F
)
# Get Games, Innings and Outs per player & team
d_p_games <- group_by( .data = d_r_1970, TEAM_ID, PLAYER_ID, POSITION ) %>%
summarise( T_O = sum( x = EVENT_OUTS_CT, na.rm = T ) ) %>%
mutate( G = T_O %/% 27
, I = ( T_O %% 27 ) %/% 3
, O = ( T_O %% 27 ) %% 3
, TEXT = paste( G,'game(s),'
, I, 'inning(s) and'
, O, 'out(s)'
, sep = ' '
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment