Created
July 14, 2015 05:00
-
-
Save cpsievert/0adc410ef37e521c4f47 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Obtain Exit Velocity and Batted Ball Distance Data | |
#' | |
#' @param id a "game_pk" id. | |
#' @examples | |
#' # Obtain data for a particular day | |
#' obs <- XML2R::XML2Obs("http://gd2.mlb.com/components/game/mlb/year_2015/month_07/day_08/miniscoreboard.xml") | |
#' gms <- obs[grepl("^games//game$", names(obs))] | |
#' ids <- XML2R::collapse_obs(gms)[, c("game_pk", "gameday_link")] | |
#' ids <- data.frame(ids, stringsAsFactors = FALSE) | |
#' ids$gameday_link <- paste0("gid_", ids$gameday_link) | |
#' bbs <- plyr::ldply(ids[, "game_pk"], grab_bb) | |
#' bbs <- plyr::join(bbs, ids) | |
#' | |
#' # Now, add the exit velocity and distance to PITCHf/x data obtain by pitchRx | |
#' dat <- pitchRx::scrape(game.ids = ids$gameday_link) | |
#' dat$atbat <- plyr::join(dat$atbat, bbs, by = c("num", "gameday_link")) | |
#' | |
grab_bb <- function(id) { | |
feed <- sprintf("http://statsapi.mlb.com/api/v1/game/%s/feed/color", id) | |
t <- jsonlite::fromJSON(feed, simplifyVector = FALSE) | |
get_des <- function(x) { | |
des <- x$data$description | |
if (is.null(des)) "" else des | |
} | |
get_id <- function(x) { | |
id <- x$id | |
if (is.null(id)) "" else id | |
} | |
des <- unlist(lapply(t$items, get_des)) | |
# keep just the descriptions of play results | |
# (this should hopefully match the number of at-bats) | |
des <- des[unlist(lapply(t$items, get_id)) == "playResult"] | |
idx <- grepl("[0-9]{2,3} mph", des) | |
exit <- as.numeric(sub(".* ([0-9]{2,3}) mph.*", "\\1", des[idx])) | |
idx2 <- grepl("[0-9]{1,3} feet", des) | |
dis <- as.numeric(sub(".* ([0-9]{1,3}) feet.*", "\\1", des[idx2])) | |
# start a matrix of missing values with one column for exit velocities, | |
# one column for distance traveled (and one row for each play) | |
m <- matrix(rep(NA, 2 * length(des)), ncol = 2) | |
m[idx, 1] <- exit | |
m[idx2, 2] <- dis | |
df <- setNames(data.frame(m), c("exit", "distance")) | |
N <- nrow(df) | |
if (N > 0) { | |
df$game_pk <- id | |
df$num <- seq_len(N) | |
} | |
df | |
} |
Second question ...
I'm following along at https://baseballwithr.wordpress.com/2015/07/15/obtaining-exit-velocity-and-distance-of-batted-balls/. Thank you for these examples. I'm stuck on one part though. When I run:
bbs <- plyr::ldply(ids[, "game_pk"], grab_bb)
I get this error:
Error: lexical error: invalid char in json text.
http://statsapi.mlb.com/api/v1/
(right here) ------^
This is with R 3.3.0 on OS X. I understand the intent of the error message but am unclear on what exactly the violation is and how to fix it. Do you have any advice on how to address this? Thank you!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Could this be modified to get launch and/or spray angle as well?