Skip to content

Instantly share code, notes, and snippets.

@pontsuyu
Last active April 3, 2020 16:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pontsuyu/53e934b81e8dfee55858d4201e1c2708 to your computer and use it in GitHub Desktop.
Save pontsuyu/53e934b81e8dfee55858d4201e1c2708 to your computer and use it in GitHub Desktop.
statcastデータにおけるlaunch_speed, launch_angleの欠測値について(前編)
# devtools::install_github("pontsuyu/statcastr")
# install.packages("patchwork")
library(statcastr)
library(patchwork)
# statcastデータの取得(30分くらいかかるかも)
dat17 <- scrape_statcast("2017-04-02", "2017-10-01")
dat18 <- scrape_statcast("2018-03-29", "2018-09-30")
dat19 <- scrape_statcast("2019-03-28", "2019-09-29")
dat <- rbind(dat17, dat18, dat19)
saveRDS(dat, "dat17to19.rds") # 保存
# dat <- readRDS("dat17to19.rds")
glimpse(dat)
# 球場情報の取得
gd <- dat$game_date %>% unique
game_pks <- purrr::map_dfr(gd, get_game_pk_info)
saveRDS(game_pks, "game_pks.rds")
# game_pks <- readRDS("game_pks.rds")
# データを投球情報,打球位置がわかる打球に絞り、球場情報を追加
datx <- dat %>%
filter(type == "X", !is.na(hc_x), !is.na(zone)) %>%
mutate(
hc_x = as.numeric(hc_x),
hc_y = as.numeric(hc_y)
) %>%
left_join(game_pks %>%
select(game_pk, park=venue.name) %>%
distinct())
# 可視化用関数
graph <- function(datx){
datx_graph <- datx %>%
mutate(
launch_speed = round(launch_speed),
launch_angle = round(launch_angle)
) %>%
count(launch_speed, launch_angle)
ue <- ggplot(datx, aes(launch_speed)) +
geom_histogram(binwidth = 1) +
xlab(NULL)
migi <- ggplot(datx, aes(launch_angle)) +
geom_histogram(binwidth = 1) +
coord_flip() +
xlab(NULL)
g <- ggplot(datx_graph, aes(launch_speed, launch_angle, fill=n)) +
geom_tile() +
scale_fill_gradient2(low = "white", mid = "pink", high = "red", midpoint = 7500) +
theme(legend.position = "bottom")
ue + plot_spacer() + g + migi + plot_layout(2, 2, widths = c(6,4), heights = c(3,7))
}
p <- graph(datx)
p
ggsave("plot.png", p, width=8, height=6)
# 頻度の多い組み合わせ
candidate <- datx %>%
count(launch_speed, launch_angle) %>%
ungroup() %>%
arrange(desc(n)) %>%
mutate(cumsum = cumsum(n),
cumsumper = round(cumsum/NROW(.)*100,2)) %>%
head(30)
candidate %>%
as.data.frame()
# TOP15とNA
combi <- candidate %>%
slice(1:15,29)
missingdata <- datx %>%
left_join(combi %>%
mutate(flg=1, flg2=row_number())) %>%
mutate(launch_speed = ifelse(is.na(flg), launch_speed, NA),
launch_angle = ifelse(is.na(flg), launch_angle, NA))
# チェック
table(missingdata$flg);table(missingdata$flg2)
p2 <- graph(missingdata)
p2
ggsave("plot2.png", p2, width=8, height=6)
# データの保存
saveRDS(missingdata, "missingdata.rds")
# 共変量探索をやっていき
# devtools::install_github("pontsuyu/pitchRx2")
# install.packages(c("tidyverse", "rpart", "partykit"))
library(tidyverse)
library(rpart)
library(partykit)
missingdata <- readRDS("missingdata.rds") %>%
mutate(hc_x = hc_x - 125.42,
hc_y = 198.27 - hc_y, # 原点をホームベースにする
flg = ifelse(is.na(flg), 0, flg),
flg2 = ifelse(is.na(flg2), 0, flg2))
# 補助変数探索
func <- function(var){
missingdata %>%
group_by_(.dots = var) %>%
summarise(na = sum(flg),
n = n(),
per = round(na/n*100,2)) %>%
arrange(desc(per))
}
## 1. 打球結果
func("bb_type")
## 犠牲バントとゴロ系の打球が多い
## 2. 打球結果
func("events")
## 3. 球場
func("park")
## 4. 打球位置
temp <- func(list(hc_x = "round(hc_x)", hc_y = "round(hc_y)"))
p <- ggplot(temp, aes(hc_x, hc_y, fill=per)) +
geom_tile()
p
ggsave("batted_ball_location.png", p, width=8, height=6)
## 投手前とファールフライは欠損率高い
## 5. 球種
func("pitch_name")
## 関係なさそう
## 6. 球速
func(list(release_speed = "round(release_speed)")) %>%
filter(n>=1000)
## 関係なさそう
## 7. 試合日
func("game_year")
func("game_date")
## 関係なさそう
## 8. 利き腕
func("p_throws")
## 関係なさそう
## 9.投手
func("pitcher") %>%
filter(n>=1000)
## 関係ありそう
## 10. 打者左右
func("stand")
## 関係なさそう
## 11.打者
func("batter") %>%
filter(n>=1000)
## 関係ありそう
## 12.ゾーン
func(list("pitch_name","zone"))
func("zone")
## 関係ありそう
# 決定木
missingdata <- missingdata %>%
mutate(r = sqrt(hc_x^2 + hc_y^2), # 極座標変換
theta = atan(hc_y / hc_x))
as <- function(x) as.character(as.numeric(as.factor(x)))
rp <- rpart(as.factor(flg) ~ as(park) + as(events) +
bb_type + zone + r + abs(theta), missingdata,
method="class", control = rpart.control(cp = 0.005))
plotcp(rp)
rp2 <- prune(rp, cp = 0.0064)
rp2
plot(as.party(rp2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment