Skip to content

Instantly share code, notes, and snippets.

@bdilday
Created August 24, 2020 11:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bdilday/36d9ce508d1aa01d27f6044c317c60ed to your computer and use it in GitHub Desktop.
Save bdilday/36d9ce508d1aa01d27f6044c317c60ed to your computer and use it in GitHub Desktop.
graphical mlb standings in R
library(dplyr)
library(ggplot2)
library(ggrepel)
library(rvest)
library(stringr)
fg_current_standings <- function() {
url = "https://www.fangraphs.com/depthcharts.aspx?position=Standings"
h = xml2::read_html(url)
tables = html_table(h)[9:14]
div_order = paste(rep(c("AL", "NL"), each=3), c("E", "C", "W"), sep='-')
current_standings = lapply(1:6, function(i) {
tmp = tables[[i]]
names(tmp) = tmp[2,]
tmp = tmp[3:nrow(tmp),1:8] # only current standings + team name, not projections
tmp$div_id = div_order[[i]]
tmp
}) %>% dplyr::bind_rows()
nms = names(current_standings)
nms = str_replace(nms, "%", "pct")
nms = str_replace(nms, "/", "_")
names(current_standings) = nms
for (nm in c("G", "W", "L", "RDif")) {
current_standings[[nm]] = as.integer(current_standings[[nm]])
}
for (nm in c("Wpct", "RS_G", "RA_G")) {
current_standings[[nm]] = as.numeric(current_standings[[nm]])
}
current_standings
}
del_run = 2.75
current_standings = fg_current_standings()
rs1 = 0.5 * (mean(current_standings$RS_G) + mean(current_standings$RA_G))
run_max = rs1 + del_run
run_min = rs1 - del_run
text_x_min = run_min + 2 * del_run * 0.12
text_x_max = run_max - 2 * del_run * 0.12
# 36 to 126 per 162 games in steps of 10
wseq = seq(81-45, 81+45, 10) / 162
slopes = sqrt((1-wseq)/wseq)
dfC = data.frame(s=slopes)
dfC$x = run_min
dfC$y = run_min * slopes
dfC$xend = run_max
dfC$yend = run_max * slopes
cc = which(dfC$y < run_min)
dfC[cc,]$y = run_min
dfC[cc,]$x = run_min / slopes[cc]
cc = which(dfC$yend > run_max)
dfC[cc,]$yend = run_max
dfC[cc,]$xend = run_max / slopes[cc]
wlabs = data.frame(w=wseq*162)
wlabs$x = text_x_min
wlabs$y = text_x_min * slopes
cc = which(wlabs$y < run_min)
if (length(cc) > 0) {
wlabs[cc,]$x = text_x_max
wlabs[cc,]$y = text_x_max * slopes[cc]
}
p = current_standings %>%
ggplot(aes(x=RS_G, y=RA_G)) + geom_point() +
geom_text_repel(aes(label=Team)) +
theme_minimal(base_size = 16) +
xlim(run_min-0.01, run_max+0.01) + ylim(run_max+0.01, run_min-0.01) +
labs(x="RS / G", y="RA / G") +
geom_segment(data=dfC, aes(x=x, y=y, xend=xend, yend=yend), alpha=0.5) +
theme(panel.grid = element_blank()) +
geom_vline(xintercept=rs1) + geom_hline(yintercept=rs1) +
facet_wrap(~div_id) + geom_text(data=wlabs, aes(x=x, y=y-0.1, label=w), alpha=0.5)
# compute the end points for the curve
current_standings = current_standings %>%
mutate(wpythag = RS_G**2/(RS_G**2 + RA_G**2),
x0 = RS_G * sqrt(Wpct/wpythag),
y0 = x0 * sqrt((1-Wpct)/Wpct))
# this function generates 100 points along a curve given by the parameters
parametric_curve = function(x0, y0, xend, yend) {
dx_seq = seq(0, 1, 0.01)
lapply(dx_seq, function(dx) {
tx = x0 + (xend-x0)*dx
ty = sqrt(x0**2 + y0**2 - tx**2)
list(x=tx, y=ty)
}) %>% bind_rows()
}
# generate the curves for each team
arcs = lapply(1:nrow(current_standings), function(idx) {
row = current_standings[idx,]
arc = parametric_curve(row$RS_G, row$RA_G, row$x0, row$y0)
arc$Team = row$Team
arc$div_id = row$div_id
arc$lucky = as.integer(row$x0 > row$RS_G)
arc
}) %>% bind_rows()
p = current_standings %>%
ggplot(aes(x=RS_G, y=RA_G)) + geom_point() +
geom_text_repel(aes(x=x0, y=y0, label=Team, color=as.factor(Wpct > wpythag))) +
theme_minimal(base_size = 16) +
xlim(run_min-0.01, run_max+0.01) + ylim(run_max+0.01, run_min-0.01) +
labs(x="RS / G", y="RA / G",
title=sprintf("Graphical MLB standings: %s", Sys.Date())) +
geom_segment(data=dfC, aes(x=x, y=y, xend=xend, yend=yend), alpha=0.5) +
theme(panel.grid = element_blank()) +
geom_vline(xintercept=rs1) + geom_hline(yintercept=rs1) +
facet_wrap(~div_id) + geom_text(data=wlabs,
aes(x=x, y=y-0.1, label=w),
alpha=0.5) +
geom_path(data=arcs, aes(x=x, y=y, group=Team)) +
scale_color_manual(values=c("steelblue", "red"), guide="none")
dot_plot = function() {
plot_df = current_standings
plot_df$div_id = factor(plot_df,
levels = c("AL-E", "AL-C","AL-W","NL-E", "NL-C","NL-W"))
plot_df = plot_df %>%
mutate(lg = ifelse(grepl('AL', div_id), 'AL', 'NL')) %>%
group_by(div_id) %>%
arrange(Wpct) %>%
mutate(div_rank=row_number()) %>%
ungroup() %>%
group_by(lg) %>%
arrange(-Wpct) %>%
mutate(lg_rank=row_number()) %>% ungroup()
p2 = plot_df %>% filter(div_rank == 5)
p1a = plot_df %>% filter(div_rank < 5, lg=='AL') %>% top_n(2, -lg_rank)
p1n = plot_df %>% filter(div_rank < 5, lg=='NL') %>% top_n(2, -lg_rank)
plot_df$playoff_bound = 0
cc2 = which(plot_df$Team %in% p2$Team)
cc1 = which(plot_df$Team %in% c(p1a$Team, p1n$Team))
plot_df[cc2,]$playoff_bound = 2
plot_df[cc1,]$playoff_bound = 1
plot_df$playoff_bound = factor(plot_df$playoff_bound, levels = c(2,1,0))
ptit = sprintf("MLB Standings - %s", Sys.Date())
p_by_div = plot_df %>%
ggplot() +
geom_dumbbell(aes(x=Wpct, xend=wpythag, y=div_rank),
size_x = 3, size_xend = 1.5,
colour_x = "black", colour_xend = "steelblue") +
geom_text(aes(x=0.2, y=div_rank, label=Team, color=playoff_bound),
hjust=0, size=3.5, nudge_y = 0.2,fontface="bold") +
theme_minimal(base_size = 16) +
theme(axis.text.y = element_blank()) +
labs(x="Win Pct.", y="", title=ptit) + xlim(0.2, 0.8) +
scale_x_continuous(sec.axis = sec_axis(~.*162, name = 'Wins per 162 G')) +
theme(panel.grid.minor.y = element_blank()) +
scale_color_manual(values = c("firebrick3", "royalblue3", "gray24")) +
guides(color=FALSE) +
geom_vline(xintercept = 0.5, linetype=2, color='gray55') +
facet_wrap(~div_id)
p_by_lg = plot_df %>%
mutate(lg_rank = -lg_rank) %>%
# mutate(Team = paste(str_sub(tolower(plot_df$div_id), 4), plot_df$Team)) %>%
ggplot() +
geom_dumbbell(aes(x=Wpct, xend=wpythag, y=lg_rank),
size_x = 3, size_xend = 1.5,
colour_x = "black", colour_xend = "steelblue") +
geom_text(aes(x=0.2, y=lg_rank, label=Team, color=playoff_bound),
hjust=0, size=3.5, nudge_y = 0.2,fontface="bold") +
theme_minimal(base_size = 16) +
theme(axis.text.y = element_blank()) +
labs(x="Win Pct.", y="", title=ptit) + xlim(0.2, 0.8) +
scale_x_continuous(sec.axis = sec_axis(~.*162, name = 'Wins per 162 G')) +
scale_color_manual(values = c("firebrick3", "royalblue3", "gray24")) +
guides(color=FALSE) +
geom_vline(xintercept = 0.5, linetype=2, color='gray55') +
facet_wrap(~lg) +
scale_y_continuous(minor_breaks = seq(-15, -1, 1))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment