library( package = 'dplyr' ) | |
library( package = 'data.table' ) | |
library( package = 'reshape2' ) | |
# Column names | |
l_e_names <- c( 'GAME_ID','CATCHER','FIRST_BASEMAN','SECOND_BASEMAN' | |
, 'THIRD_BASEMAN','SHORTSTOP','LEFT_FIELDER','CENTER_FIELDER' | |
, 'RIGHT_FIELDER', 'EVENT_OUTS_CT' ,'TEAM_ID' | |
) | |
# Column types | |
l_e_cols <- c( 'character' | |
, rep( x = 'NULL', times = 17 ) | |
, rep( x = 'character', times = 8 ) | |
, rep( x = 'NULL', times = 14 ) | |
, 'numeric' | |
, rep( x = 'NULL', times = 58 ) | |
, 'character' | |
, rep( x = 'NULL', times = 58 ) | |
) | |
# Push the 1970 season into the environment. | |
d_e_1970 <- fread( input = 'all1970.csv' | |
, sep = ',' | |
, colClasses = l_e_cols | |
, col.names = l_e_names | |
) | |
# Transform the dataset: | |
# IDs: GAME_ID, OUTS & FIELDING TEAM | |
# Measures: All fielding positions, except for pitcher | |
d_r_1970 <- melt( data = d_e_1970 | |
, id.vars = l_e_names[ c( 1, 10, 11 ) ] | |
, measure.vars = l_e_names[ -c( 1, 10, 11 ) ] | |
, variable.name = 'POSITION' | |
, value.name = 'PLAYER_ID' | |
, variable.factor = F | |
) | |
# Get Games, Innings and Outs per player & team | |
d_p_games <- group_by( .data = d_r_1970, TEAM_ID, PLAYER_ID, POSITION ) %>% | |
summarise( T_O = sum( x = EVENT_OUTS_CT, na.rm = T ) ) %>% | |
mutate( G = T_O %/% 27 | |
, I = ( T_O %% 27 ) %/% 3 | |
, O = ( T_O %% 27 ) %% 3 | |
, TEXT = paste( G,'game(s),' | |
, I, 'inning(s) and' | |
, O, 'out(s)' | |
, sep = ' ' | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment