-
-
Save andresAlvarado/b63f861dac5b2e12bcd059fa08ec410f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library( package = 'dplyr' ) | |
library( package = 'data.table' ) | |
# Create vectors that will store column classes for the game and event files | |
l_e_cols <- c( 'character' # GAME_ID | |
, rep( x = 'NULL', times = 13 ) | |
, 'character' # PIT_ID | |
, rep( x = 'NULL', times = 25 ) | |
, 'numeric' # EVENT_OUTS_CT | |
, rep( x = 'NULL', times = 58 ) | |
, 'character' # FLD_TEAM_ID | |
, rep( x = 'NULL', times = 58 ) | |
) | |
l_g_cols <- c( rep( x = 'NULL', times = 8 ) | |
, 'character' # HOME_TEAM_ID | |
, rep( x = 'NULL', times = 76 ) | |
, 'character' # HOME_TEAM_LEAGUE_ID | |
, rep( x = 'NULL', times = 93 ) | |
) | |
# Create vectors that will store column names for the game and event files | |
l_e_names <- c( 'GAME_ID', 'PIT_ID', 'EVENT_OUTS', 'TEAM_ID' ) | |
l_g_names <- c( 'TEAM_ID','LEAGUE_ID' ) | |
# Load the 1990 season event file into the environment | |
d_e_1990 <- fread( input = 'all1990.csv' | |
, sep = ',' | |
, header = T | |
, colClasses = l_e_cols | |
, col.names = l_e_names | |
) | |
# Load the 1990 season game file into the environment | |
d_g_1990 <- fread( input = 'games1990.csv' | |
, sep = ',' | |
, header = T | |
, colClasses = l_g_cols | |
, col.names = l_g_names | |
) | |
# Game dataset ( d_g_1990 ) has a lot of duplicate records, so get unique observations | |
d_g_1990 <- distinct( .data = d_g_1990, TEAM_ID, LEAGUE_ID ) | |
# Associate Game dataset and Event dataset | |
d_1990 <- inner_join( x = d_e_1990, y = d_g_1990, by = c('TEAM_ID') ) | |
# Get IP by every pitcher for every game played in the 1990 season. | |
d_g_ip <- group_by( .data = d_1990, GAME_ID, PIT_ID ) %>% | |
summarise( O = sum( x = EVENT_OUTS, na.rm = T ) ) %>% | |
mutate( IP = O %/%3 + O %% 0.3 ) | |
# Get IP by every pitcher in the 1990 season. | |
d_s_ip <- group_by( .data = d_1990, PIT_ID ) %>% | |
summarise( O = sum( x = EVENT_OUTS, na.rm = T ) ) %>% | |
mutate( IP = O %/%3 + O %% 0.3 ) | |
# Get IP by every team in the 1990 season. | |
d_t_ip <- group_by( .data = d_1990, TEAM_ID, LEAGUE_ID ) %>% | |
summarise( O = sum( x = EVENT_OUTS, na.rm = T ) ) %>% | |
mutate( IP = O %/%3 + O %% 0.3 ) | |
# Get IP by the AL and NL leagues in the 1990 season. | |
d_l_ip <- group_by( .data = d_1990, LEAGUE_ID ) %>% | |
summarise( O = sum( x = EVENT_OUTS, na.rm = T ) ) %>% | |
mutate( IP = O %/%3 + O %% 0.3 ) | |
# Get the total of IP in the 1990 season | |
d_mlb_ip <- mutate( .data = d_1990, YEAR = 1990 ) %>% | |
group_by( YEAR ) %>% | |
summarise( O = sum( x = EVENT_OUTS, na.rm = T ) ) %>% | |
mutate( IP = O %/%3 + O %% 0.3 ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment