Skip to content

Instantly share code, notes, and snippets.

@tiernanmartin
Last active July 12, 2018 16:23
Show Gist options
  • Save tiernanmartin/11c638ed7516ad11e9d5913503a1d866 to your computer and use it in GitHub Desktop.
Save tiernanmartin/11c638ed7516ad11e9d5913503a1d866 to your computer and use it in GitHub Desktop.
Update World Cup Coach Pool
# SETUP -------------------------------------------------------------------
library(janitor)
library(fuzzyjoin)
library(operator.tools)
library(rvest)
library(glue)
library(googlesheets)
library(googledrive)
library(magrittr)
library(jsonlite)
library(tidyverse)
first_not_na <- function (x)
{
if (all(sapply(x, is.na))) {
as(NA, class(x))
}
else {
x[!sapply(x, is.na)][1]
}
}
# LOAD DATA ---------------------------------------------------------------
json_data_url <- "https://raw.githubusercontent.com/lsv/fifa-worldcup-2018/master/data.json"
json_data <- fromJSON(json_data_url, flatten = TRUE)
draft_raw <- function(){
tibble::tribble(
~DAT,
"Isaac Leake",
"1",
"Brazil CONMEBOL",
"16",
"Switzerland UEFA",
"17",
"Russia UEFA",
"32",
"Panama CONCACAF",
"Mike Miller",
"2",
"Germany UEFA",
"15",
"Denmark UEFA",
"18",
"Senegal CAF",
"31",
"Saudi Arabia AFC",
"Cory Castagno",
"3",
"France UEFA",
"14",
"Mexico CONCACAF",
"19",
"Egypt CAF",
"30",
"Korea Republic AFC",
"Tiernan Martin",
"4",
"Argentina CONMEBOL",
"13",
"Sweden UEFA",
"20",
"Japan AFC",
"29",
"Iran AFC",
"Trevor Pendras",
"5",
"Spain UEFA",
"12",
"Colombia CONMEBOL",
"21",
"Iceland UEFA",
"28",
"Tunisia CAF",
"Miles Burnett",
"6",
"Belgium UEFA",
"11",
"Uruguay CONMEBOL",
"22",
"Serbia UEFA",
"27",
"Morocco CAF",
"Gustaf Andreasen",
"7",
"Portugal UEFA",
"10",
"Poland UEFA",
"23",
"Peru CONMEBOL",
"26",
"Costa Rica CONCACAF",
"Kyle Castagno",
"8",
"England UEFA",
"9",
"Croatia UEFA",
"24",
"Nigeria CAF",
"25",
"Australia AFC"
)
}
player_goals_url <- "https://indianexpress.com/section/fifa/stats/"
player_goals_html <- read_html(player_goals_url)
team_golden_boot <- player_goals_html %>%
html_nodes("div.points-body.clear") %>%
html_nodes("div.gl-dtl") %>%
extract2(1) %>%
html_nodes("small") %>%
html_text()
# TRANSFORM DATA ----------------------------------------------------------
teams <- tibble(TEAM_NAME = json_data$teams$name,
TEAM_ID = json_data$teams$id,
TEAM_FIFACODE = json_data$teams$fifaCode)
team_ids <- map2(teams$TEAM_NAME, teams$TEAM_ID, set_names) %>% flatten_chr()
team_codes <- map2(teams$TEAM_FIFACODE, teams$TEAM_NAME, set_names) %>% flatten_chr()
group_results <- tibble(GROUP = map_chr(json_data$groups, ~.x$name),
GROUP_WINNER = map_chr(json_data$groups, ~ team_ids[.x$winner]),
GROUP_RUNNERUP = map_chr(json_data$groups, ~ team_ids[.x$runnerup]))
games <- map_df(json_data$groups, ~ mutate(.x$matches, type = .x$name)) %>%
bind_rows(map_df(json_data$knockout, ~ mutate(.x$matches, type = .x$name))) %>%
clean_names(case = "screaming_snake") %>%
select(DATE, NAME, TYPE, matches("HOME|AWAY"), FINISHED) %>%
mutate(DATE = as.Date(DATE)) %>%
gather(COL, TEAM_ID, matches("TEAM|WINNER")) %>%
left_join(teams, by = "TEAM_ID") %>%
select(-TEAM_ID, -TEAM_FIFACODE) %>%
spread(COL, TEAM_NAME) %>%
transmute(DATE,
GAME_NUMBER = NAME,
GAME_ID = as.character(glue("{DATE}-{team_codes[HOME_TEAM]}-{team_codes[AWAY_TEAM]}")),
GAME_TYPE = TYPE,
HOME_TEAM, AWAY_TEAM, HOME_RESULT, AWAY_RESULT, HOME_PENALTY, AWAY_PENALTY, FINISHED) %>%
as_tibble
draft_order <- draft_raw() %>%
mutate(GRP = ntile(row_number(),8)) %>%
group_by(GRP) %>%
transmute(DRAFT_PICK = as.integer(str_extract(DAT, "\\d+")),
TEAM = str_extract(DAT, ".+(?=\\s)"),
COACH = str_extract(first(DAT),".+(?=\\s)")) %>%
mutate(TEAM = case_when( TEAM == "Korea Republic" ~ "South Korea", TRUE ~ TEAM)) %>%
slice(-1) %>%
ungroup %>%
select(-GRP) %>%
fill(DRAFT_PICK, .direction = "down") %>%
fill(TEAM, .direction = "up") %>%
distinct()
# MAKE RANKING ------------------------------------------------------------
get_score <- function(x, for_team = TRUE, score_type){
if(for_team){
filter(x, str_detect(SCORE_TYPE, unique(x$TEAM_STATUS)) & str_detect(SCORE_TYPE, score_type) ) %>% pull(SCORE_VALUE)
}else{
filter(x, str_detect(SCORE_TYPE, unique(x$TEAM_STATUS_OPPONENT)) & str_detect(SCORE_TYPE, score_type) ) %>% pull(SCORE_VALUE)
}
}
games_long <- games %>%
gather(TEAM_STATUS, TEAM_NAME, c(HOME_TEAM, AWAY_TEAM)) %>%
mutate(TEAM_STATUS = str_extract(TEAM_STATUS,"^.{4}")) %>%
gather(SCORE_TYPE, SCORE_VALUE, matches("HOME|AWAY")) %>%
mutate(TEAM_STATUS_OPPONENT = if_else(TEAM_STATUS %in% "HOME", "AWAY", "HOME")) %>%
group_by(GAME_TYPE, DATE, GAME_NUMBER, TEAM_NAME) %>%
nest() %>%
filter(!is.na(TEAM_NAME)) %>%
mutate(GOALS_FOR = map_int(data, get_score, score_type = "RESULT"),
GOALS_AGAINST = map_int(data, get_score, for_team = FALSE, score_type = "RESULT"),
PENALTIES_FOR = map_int(data, get_score, score_type = "PENALTY"),
PENALTIES_AGAINST = map_int(data, get_score, for_team = FALSE, score_type = "PENALTY")) %>%
mutate(GAME_RESULT = case_when(
is.na(PENALTIES_FOR) & GOALS_FOR - GOALS_AGAINST > 0 ~ "win",
is.na(PENALTIES_FOR) & GOALS_FOR - GOALS_AGAINST < 0 ~ "loss",
is.na(PENALTIES_FOR) & GOALS_FOR - GOALS_AGAINST == 0 ~ "tie",
PENALTIES_FOR > PENALTIES_AGAINST ~ "win",
PENALTIES_FOR < PENALTIES_AGAINST ~ "loss",
TRUE ~ NA_character_
)) %>%
select(-data) %>%
full_join(draft_order, by = c(TEAM_NAME = "TEAM"))
games_long_points <- games_long %>%
mutate(GAME_POINTS = case_when(
str_detect(GAME_TYPE, "Group") & GAME_RESULT %in% "win" ~ 3,
str_detect(GAME_TYPE, "Group") & GAME_RESULT %in% "tie" ~ 1,
GAME_TYPE %in% "Round of 16" & GAME_RESULT %in% "win" ~ 8,
GAME_TYPE %in% "Quarter-finals" & GAME_RESULT %in% "win" ~ 10,
GAME_TYPE %in% "Semi-finals" & GAME_RESULT %in% "win" ~ 12,
GAME_TYPE %in% "Third place play-off" & GAME_RESULT %in% "win" ~ 6,
GAME_TYPE %in% "Final" & GAME_RESULT %in% "win" ~ 15,
GAME_RESULT %in% "loss" ~ 0,
TRUE ~ NA_real_
))
games_wide <- games_long_points %>%
group_by(TEAM_NAME, GAME_TYPE) %>%
mutate(GAME_NUM = case_when(
str_detect(GAME_TYPE, "Group") ~ cut(DATE, breaks = 3, labels = c("Game1","Game2","Game3"), right = FALSE) %>% as.character(),
TRUE ~ ""
)) %>%
mutate(STAGE = case_when(
str_detect(GAME_TYPE, "Group") ~ str_c("GROUP",GAME_NUM,sep = "_"),
TRUE ~ GAME_TYPE
)) %>%
spread(STAGE, GAME_POINTS) %>%
clean_names(case = "screaming_snake") %>%
group_by(TEAM_NAME) %>%
summarise_all(first_not_na) %>%
mutate(GROUP_RESULT = case_when(
TEAM_NAME %in% group_results$GROUP_WINNER ~ "winner",
TEAM_NAME %in% group_results$GROUP_RUNNERUP ~ "runnerup",
TRUE ~ NA_character_
)) %>%
mutate(GROUP_POINTS = case_when(
GROUP_RESULT %in% "winner" ~ 6,
GROUP_RESULT %in% "runnerup" ~ 4,
TRUE ~ NA_real_
)) %>%
mutate(GOLDEN_BOOT_BONUS = case_when(
TEAM_NAME %in% team_golden_boot ~ 5,
TRUE ~ NA_real_
)) %>%
select(COACH, DRAFT_PICK,TEAM_NAME,GROUP_RESULT,GROUP_GAME1:GROUP_GAME3,GROUP_POINTS,ROUND_OF_16,QUARTER_FINALS, SEMI_FINALS, THIRD_PLACE = THIRD_PLACE_PLAY_OFF, FINAL, GOLDEN_BOOT_BONUS ) %>%
gather(COL, POINTS, GROUP_GAME1:GOLDEN_BOOT_BONUS) %>%
group_by(TEAM_NAME) %>%
mutate(TOTAL_POINTS = sum(POINTS, na.rm = TRUE)) %>%
spread(COL, POINTS) %>%
select(COACH:GROUP_RESULT, GROUP_GAME1, GROUP_GAME2, GROUP_GAME3, GROUP_POINTS, ROUND_OF_16,QUARTER_FINALS, SEMI_FINALS, THIRD_PLACE, FINAL, GOLDEN_BOOT_BONUS,TOTAL_POINTS)
coach_rank <- games_wide %>%
group_by(COACH) %>%
summarise(GROUP = sum(GROUP_GAME1,GROUP_GAME2,GROUP_GAME3, GROUP_POINTS,na.rm = TRUE),
ROUND_OF_16 = sum(ROUND_OF_16, na.rm = TRUE),
QUARTERFINAL = sum(QUARTER_FINALS, na.rm = TRUE),
SEMIFINAL = sum(SEMI_FINALS, na.rm = TRUE),
THIRD_PLACE = sum(THIRD_PLACE, na.rm = TRUE),
FINAL = sum(FINAL, na.rm = TRUE),
GOLDEN_BOOT = sum(GOLDEN_BOOT_BONUS,na.rm = TRUE),
TOTAL = sum(TOTAL_POINTS, na.rm = TRUE)) %>%
mutate(RANK = dense_rank(desc(TOTAL)),
UPDATED = as.character(Sys.time())) %>%
select(RANK,COACH:TOTAL,UPDATED) %>%
arrange(desc(TOTAL))
# UPLOAD DATA -------------------------------------------------------------
sheet_key <- as_id("1h2pKCl0N-xhz7WBXQ5XjcTwL-nxmHUr1HWwfCo3fS7w")
dd_ss <- gs_key(sheet_key, lookup = NULL, visibility = NULL, verbose = TRUE)
upload_games_points_long <- function(){
gs_edit_cells(dd_ss,
ws = "UPLOAD_GAMES_RAW",
input = games_long_points,
byrow = FALSE,
col_names = TRUE,
trim = FALSE,
verbose = TRUE)
}
upload_games_wide <- function(){
gs_edit_cells(dd_ss,
ws = "UPLOAD_COACHES_RAW",
input = games_wide,
byrow = FALSE,
col_names = TRUE,
trim = FALSE,
verbose = TRUE)
}
upload_coach_rank <- function(){
gs_edit_cells(dd_ss,
ws = "UPLOAD_COACH_RANK_RAW",
input = coach_rank,
byrow = FALSE,
col_names = TRUE,
trim = FALSE,
verbose = TRUE)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment