Skip to content

Instantly share code, notes, and snippets.

@tiernanmartin
Last active June 29, 2018 15:21
Show Gist options
  • Save tiernanmartin/d953f905760bfbce5faa03ff9bcb492e to your computer and use it in GitHub Desktop.
Save tiernanmartin/d953f905760bfbce5faa03ff9bcb492e to your computer and use it in GitHub Desktop.
World Cup 2018 Draft
# SETUP -------------------------------------------------------------------
library(janitor)
library(fuzzyjoin)
library(operator.tools)
library(rvest)
library(glue)
library(googlesheets)
library(googledrive)
library(magrittr)
library(tidyverse)
first_not_na <- function (x)
{
if (all(sapply(x, is.na))) {
as(NA, class(x))
}
else {
x[!sapply(x, is.na)][1]
}
}
# LOAD DATA ---------------------------------------------------------------
games_csv_url <- "https://projects.fivethirtyeight.com/soccer-api/international/2018/wc_matches.csv"
games_raw <- games_csv_url %>%
read_csv() %>%
janitor::clean_names(case = "screaming_snake")
results_csv_url <- "https://projects.fivethirtyeight.com/soccer-api/international/2018/wc_forecasts.csv"
results_raw <- results_csv_url %>%
read_csv() %>%
janitor::clean_names(case = "screaming_snake")
draft_raw <- function(){
tibble::tribble(
~DAT,
"Isaac Leake",
"1",
"Brazil CONMEBOL",
"16",
"Switzerland UEFA",
"17",
"Russia UEFA",
"32",
"Panama CONCACAF",
"Mike Miller",
"2",
"Germany UEFA",
"15",
"Denmark UEFA",
"18",
"Senegal CAF",
"31",
"Saudi Arabia AFC",
"Cory Castagno",
"3",
"France UEFA",
"14",
"Mexico CONCACAF",
"19",
"Egypt CAF",
"30",
"Korea Republic AFC",
"Tiernan Martin",
"4",
"Argentina CONMEBOL",
"13",
"Sweden UEFA",
"20",
"Japan AFC",
"29",
"Iran AFC",
"Trevor Pendras",
"5",
"Spain UEFA",
"12",
"Colombia CONMEBOL",
"21",
"Iceland UEFA",
"28",
"Tunisia CAF",
"Miles Burnett",
"6",
"Belgium UEFA",
"11",
"Uruguay CONMEBOL",
"22",
"Serbia UEFA",
"27",
"Morocco CAF",
"Gustaf Andreasen",
"7",
"Portugal UEFA",
"10",
"Poland UEFA",
"23",
"Peru CONMEBOL",
"26",
"Costa Rica CONCACAF",
"Kyle Castagno",
"8",
"England UEFA",
"9",
"Croatia UEFA",
"24",
"Nigeria CAF",
"25",
"Australia AFC"
)
}
country_codes_url <- "https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3"
country_codes_html <- read_html(country_codes_url)
country_codes_raw <- country_codes_html %>%
html_nodes("table") %>%
magrittr::extract(2:4) %>%
html_table() %>%
reduce(bind_rows) %>%
set_colnames(c("CODE","TEAM")) %>%
transmute(CODE,
TEAM_SHORT = str_extract(TEAM,"(\\w+){1}"))
# TRANSFORM DATA ----------------------------------------------------------
draft_order <- draft_raw() %>%
mutate(GRP = ntile(row_number(),8)) %>%
group_by(GRP) %>%
transmute(DRAFT_PICK = as.integer(str_extract(DAT, "\\d+")),
TEAM = str_extract(DAT, ".+(?=\\s)"),
COACH = str_extract(first(DAT),".+(?=\\s)")) %>%
mutate(TEAM = case_when( TEAM == "Korea Republic" ~ "South Korea", TRUE ~ TEAM)) %>%
slice(-1) %>%
ungroup %>%
select(-GRP) %>%
fill(DRAFT_PICK, .direction = "down") %>%
fill(TEAM, .direction = "up") %>%
distinct()
team_codes <- games_raw %>%
gather(TEAM_NUM, TEAM, matches("TEAM")) %>%
select(TEAM) %>%
distinct() %>%
regex_left_join(country_codes_raw, by = c(TEAM = "TEAM_SHORT")) %>%
group_by(TEAM) %>%
slice(1) %>%
ungroup %>%
transmute(TEAM,
CODE = case_when(
TEAM == "Russia" ~ "RUS",
TEAM == "England" ~ "ENG",
TEAM == "South Korea" ~ "KOR",
TRUE ~ CODE),
NAMED = map2(CODE,TEAM, set_names)) %>%
pull %>%
flatten_chr
# MAKE RANKING ------------------------------------------------------------
results <- results_raw %>%
select(FORECAST_TIMESTAMP,TEAM,matches("GROUP"),matches("MAKE")) %>%
group_by(TEAM) %>%
filter(FORECAST_TIMESTAMP == max(FORECAST_TIMESTAMP)) %>%
ungroup %>%
arrange(GROUP)
games_long <- games_raw %>%
select(DATE, matches("TEAM|^SCORE")) %>%
mutate(GAME_ID = as.character(glue("{DATE}-{team_codes[TEAM1]}-{team_codes[TEAM2]}"))) %>%
gather(TEAM_NUM,TEAM_NAME, matches("TEAM")) %>%
gather(SCORE_NUM, SCORE_VAL, matches("SCORE")) %>%
arrange(DATE) %>%
mutate(TEAM_NUM = as.integer(str_extract(TEAM_NUM,"\\d+"))) %>%
group_by(DATE,GAME_ID, TEAM_NAME, TEAM_NUM) %>%
nest() %>%
mutate(GOALS_FOR = map2_int(data, TEAM_NUM, ~filter(.x, str_detect(SCORE_NUM,as.character(.y))) %>% pull(SCORE_VAL)),
GOALS_AGAINST = map2_int(data, TEAM_NUM, ~filter(.x, !str_detect(SCORE_NUM,as.character(.y))) %>% pull(SCORE_VAL))) %>%
mutate(GAME_RESULT = case_when(
GOALS_FOR - GOALS_AGAINST > 0 ~ "win",
GOALS_FOR - GOALS_AGAINST < 0 ~ "loss",
GOALS_FOR - GOALS_AGAINST == 0 ~ "tie",
TRUE ~ NA_character_
)) %>%
select(-data, -TEAM_NUM) %>%
full_join(draft_order, by = c(TEAM_NAME = "TEAM")) %>%
select(DATE,GAME_ID,COACH,DRAFT_PICK,everything()) %>%
arrange(GAME_ID)
games_long_points <- games_long %>%
mutate(STAGE = cut(DATE,
breaks = c(as.Date("2018-06-14"),as.Date("2018-06-29"),as.Date("2018-07-8"),as.Date("2018-07-12"),as.Date("2018-07-14"),as.Date("2018-07-30")),
labels = c("Group","Round_of_16","Quarterfinal","Semifinal","Final")) %>% as.character()) %>%
mutate(GAME_POINTS = case_when(
STAGE %in% "Group" & GAME_RESULT %in% "win" ~ 3,
STAGE %in% "Group" & GAME_RESULT %in% "tie" ~ 1,
STAGE %in% "Round_of_16" & GAME_RESULT %in% "win" ~ 8,
STAGE %in% "Quarterfinal" & GAME_RESULT %in% "win" ~ 10,
STAGE %in% "Semifinal" & GAME_RESULT %in% "win" ~ 12,
STAGE %in% "Final" & GAME_RESULT %in% "win" ~ 15,
GAME_RESULT %in% "loss" ~ 0,
TRUE ~ NA_real_
))
games_wide <- games_long_points %>%
group_by(TEAM_NAME) %>%
mutate(TOTAL_GOALS_FOR = sum(GOALS_FOR, na.rm = TRUE),
TOTAL_GOALS_AGAINST = sum(GOALS_AGAINST, na.rm = TRUE),
TOTAL_POINTS = sum(GAME_POINTS, na.rm = TRUE)
) %>%
ungroup %>%
mutate(GB_CONTENDER = TOTAL_GOALS_FOR == max(TOTAL_GOALS_FOR, na.rm = TRUE)) %>%
group_by(TEAM_NAME, STAGE) %>%
mutate(GAME_NUM = case_when(
STAGE == "Group" ~ cut(DATE, breaks = 3, labels = c("Game1","Game2","Game3"), right = FALSE) %>% as.character(),
TRUE ~ ""
)) %>%
mutate(GAME_STAGE = case_when(
STAGE == "Group" ~ str_c(STAGE,GAME_NUM,sep = "_"),
TRUE ~ STAGE
)) %>%
spread(GAME_STAGE, GAME_POINTS) %>%
group_by(TEAM_NAME) %>%
summarise_all(first_not_na) %>%
left_join(results, by = c(TEAM_NAME = "TEAM")) %>%
mutate(GROUP_RANK_POINTS = case_when(
GROUP_1 == 1 ~ 6,
GROUP_2 == 1 ~ 4,
TRUE ~ 0),
TOTAL_POINTS = TOTAL_POINTS + GROUP_RANK_POINTS) %>%
select(COACH, DRAFT_PICK,TEAM_NAME, GAME_NUM:Round_of_16,GROUP_RANK_POINTS,GB_CONTENDER,starts_with("TOTAL")) %>%
arrange(COACH, desc(TOTAL_POINTS))
coach_rank <- games_wide %>%
group_by(COACH) %>%
summarise(GROUP = sum(Group_Game1,Group_Game2,Group_Game3,GROUP_RANK_POINTS, na.rm = TRUE),
ROUND_OF_16 = sum(Round_of_16, na.rm = TRUE),
QUARTERFINAL = 0,
SEMIFINAL = 0,
FINAL = 0,
GOLDEN_BOOT = 0,
TOTAL = sum(TOTAL_POINTS, na.rm = TRUE)) %>%
mutate(RANK = dense_rank(desc(TOTAL)),
UPDATED = as.character(Sys.time())) %>%
select(RANK,COACH:TOTAL,UPDATED) %>%
arrange(desc(TOTAL))
# UPLOAD DATA -------------------------------------------------------------
sheet_key <- as_id("1h2pKCl0N-xhz7WBXQ5XjcTwL-nxmHUr1HWwfCo3fS7w")
dd_ss <- gs_key(sheet_key, lookup = NULL, visibility = NULL, verbose = TRUE)
upload_games_points_long <- function(){
gs_edit_cells(dd_ss,
ws = "UPLOAD_GAMES_RAW",
input = games_long_points,
byrow = FALSE,
col_names = TRUE,
trim = FALSE,
verbose = TRUE)
}
upload_games_wide <- function(){
gs_edit_cells(dd_ss,
ws = "UPLOAD_COACHES_RAW",
input = games_wide,
byrow = FALSE,
col_names = TRUE,
trim = FALSE,
verbose = TRUE)
}
upload_coach_rank <- function(){
gs_edit_cells(dd_ss,
ws = "UPLOAD_COACH_RANK_RAW",
input = coach_rank,
byrow = FALSE,
col_names = TRUE,
trim = FALSE,
verbose = TRUE)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment