Skip to content

Instantly share code, notes, and snippets.

@gallochris
Created March 21, 2024 14:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gallochris/514531c82fe9da62419da6407710fc83 to your computer and use it in GitHub Desktop.
Save gallochris/514531c82fe9da62419da6407710fc83 to your computer and use it in GitHub Desktop.
Minutes + continuity of NCAA Tourney Field
# First, use hoopR to grab the kenpom data for continuity and experience
meta_metrics <- hoopR::kp_height(min_year = 2024, max_year = 2024)
# now load the seed data and filter out teams that already lost
seeds <-
readr::read_csv(
"https://gist.githubusercontent.com/gallochris/86fc2a21dd4b30e3dd9ea79516594f05/raw/1e2dfa7eae2d196f5dee9ecf5fe95e5f76ba827d/ncaat_seeds.csv"
) |>
dplyr::select(seed = Seed,
team = Team,
region = Region)
ncaat_teams <- seeds |>
dplyr::filter(!team %in% c("Virginia", "Howard", "Boise St.", "Montana St.")) |>
dplyr::pull(team)
# need to match the teams from kenpom to torvik using cbbdata
cbbdata::cbd_teams() |>
dplyr::rename(team = kp_team) -> teams
# join the data and filter out by the tourney field
meta_metrics |>
dplyr::left_join(teams, by = "team") |>
dplyr::filter(torvik_team %in% ncaat_teams) -> meta_list
# make an experience table for top ten and bottom ten
exp_best <- meta_list |>
dplyr::arrange(experience_rk) |>
dplyr::slice(1:10) |>
dplyr::mutate(row_num = dplyr::row_number()) |>
dplyr::select(row_num, team, experience)
exp_bottom <- meta_list |>
dplyr::arrange(-experience_rk) |>
dplyr::slice(1:10) |>
dplyr::mutate(row_num = dplyr::row_number()) |>
dplyr::select(row_num, team, experience)
# join the data and make the table
exp_tbl <- exp_best |>
dplyr::left_join(exp_bottom, by = "row_num") |>
cbbplotR::gt_cbb_teams(team.x, team.x) |>
cbbplotR::gt_cbb_teams(team.y, team.y) |>
gt::gt() |>
gt::cols_hide(columns = row_num) |>
gt::cols_label(
team.x = "",
experience.x = "Experience",
team.y = "",
experience.y = "Experience",
) |>
gt::fmt_markdown(team.x) |>
gt::fmt_markdown(team.y) |>
gt::data_color(
columns = c(experience.x),
palette = "BuGn",
domain = c(2.80, 3.4)
) |>
gt::data_color(
columns = c(experience.y),
palette = "RdPu",
domain = c(1.00, 1.75)
) |>
cbbplotR::gt_theme_athletic() |>
gt::cols_align(align = "left", columns = c(team.x, team.y)) |>
gtExtras::gt_add_divider(columns = c(team.y), sides = "left") |>
gt::tab_header(
title = gt::html("NCAA Tournament Teams with Most <em>and</em> Least Experience"),
subtitle = gt::html(
"The average experience of the tournament field is <b>~2.3</b> years."
)
) |>
gt::tab_source_note(
source_note = gt::html(
"<hr>Experience is from kenpom.com where a freshman has zero years of experience, a sophomore has one year of experience, etc<hr>
<br><br><b>Table by Chris (@dadgumboxscores) |
data via kenpom.com, hoopR, cbbdata, cbbplotR | March 21, 2024</b>"
)
) |>
gt::tab_style(
locations = gt::cells_title(groups = "subtitle"),
style = gt::cell_text(size = "medium")
) |>
gt::tab_style(
locations = gt::cells_source_notes(),
style = gt::cell_text(size = "small")
) |>
gt::tab_style(
style = gt::cell_borders(
sides = "top",
color = 'black',
weight = gt::px(1.5),
style = 'solid'
),
locations = gt::cells_body(rows = gt::everything())
)
# make a continuity table
cont_best <- meta_list |>
dplyr::arrange(continuity_rk) |>
dplyr::slice(1:10) |>
dplyr::mutate(row_num = dplyr::row_number()) |>
dplyr::select(row_num, team, continuity)
cont_bottom <- meta_list |>
dplyr::arrange(-continuity_rk) |>
dplyr::slice(1:10) |>
dplyr::mutate(row_num = dplyr::row_number()) |>
dplyr::select(row_num, team, continuity)
# join the data and make the table
cont_tbl <- cont_best |>
dplyr::left_join(cont_bottom, by = "row_num") |>
cbbplotR::gt_cbb_teams(team.x, team.x) |>
cbbplotR::gt_cbb_teams(team.y, team.y) |>
gt::gt() |>
gt::cols_hide(columns = row_num) |>
gt::cols_label(
team.x = "",
continuity.x = "Continuity",
team.y = "",
continuity.y = "Continuity",
) |>
gt::fmt_markdown(team.x) |>
gt::fmt_markdown(team.y) |>
gt::data_color(
columns = c(continuity.x),
palette = "BuGn",
domain = c(64, 83)
) |>
gt::data_color(
columns = c(continuity.y),
palette = "RdPu",
domain = c(0, 30)
) |>
gtExtras::gt_add_divider(columns = c(team.y), sides = "left") |>
cbbplotR::gt_theme_athletic() |>
gt::cols_align(align = "left", columns = c(team.x, team.y)) |>
gt::tab_header(
title = gt::html("NCAA Tournament Teams with Most <em>and</em> Least Continuity"),
subtitle = gt::html("The average continuity of the tournament field is <b>~45.3</b>.")
) |>
gt::tab_source_note(
source_note = gt::html(
"<hr>Continuity is from kenpom.com and it measures the percentage of minutes played by same players from last season to this season.<hr>
<br><br><b>Table by Chris (@dadgumboxscores) |
data via kenpom.com, hoopR, cbbdata, cbbplotR | March 21, 2024</b>"
)
) |>
gt::tab_style(
locations = gt::cells_title(groups = "subtitle"),
style = gt::cell_text(size = "medium")
) |>
gt::tab_style(
locations = gt::cells_source_notes(),
style = gt::cell_text(size = "small")
) |>
gt::tab_style(
style = gt::cell_borders(
sides = "top",
color = 'black',
weight = gt::px(1.5),
style = 'solid'
),
locations = gt::cells_body(rows = gt::everything())
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment