Last active
July 12, 2018 16:23
-
-
Save tiernanmartin/11c638ed7516ad11e9d5913503a1d866 to your computer and use it in GitHub Desktop.
Update World Cup Coach Pool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SETUP ------------------------------------------------------------------- | |
library(janitor) | |
library(fuzzyjoin) | |
library(operator.tools) | |
library(rvest) | |
library(glue) | |
library(googlesheets) | |
library(googledrive) | |
library(magrittr) | |
library(jsonlite) | |
library(tidyverse) | |
first_not_na <- function (x) | |
{ | |
if (all(sapply(x, is.na))) { | |
as(NA, class(x)) | |
} | |
else { | |
x[!sapply(x, is.na)][1] | |
} | |
} | |
# LOAD DATA --------------------------------------------------------------- | |
json_data_url <- "https://raw.githubusercontent.com/lsv/fifa-worldcup-2018/master/data.json" | |
json_data <- fromJSON(json_data_url, flatten = TRUE) | |
draft_raw <- function(){ | |
tibble::tribble( | |
~DAT, | |
"Isaac Leake", | |
"1", | |
"Brazil CONMEBOL", | |
"16", | |
"Switzerland UEFA", | |
"17", | |
"Russia UEFA", | |
"32", | |
"Panama CONCACAF", | |
"Mike Miller", | |
"2", | |
"Germany UEFA", | |
"15", | |
"Denmark UEFA", | |
"18", | |
"Senegal CAF", | |
"31", | |
"Saudi Arabia AFC", | |
"Cory Castagno", | |
"3", | |
"France UEFA", | |
"14", | |
"Mexico CONCACAF", | |
"19", | |
"Egypt CAF", | |
"30", | |
"Korea Republic AFC", | |
"Tiernan Martin", | |
"4", | |
"Argentina CONMEBOL", | |
"13", | |
"Sweden UEFA", | |
"20", | |
"Japan AFC", | |
"29", | |
"Iran AFC", | |
"Trevor Pendras", | |
"5", | |
"Spain UEFA", | |
"12", | |
"Colombia CONMEBOL", | |
"21", | |
"Iceland UEFA", | |
"28", | |
"Tunisia CAF", | |
"Miles Burnett", | |
"6", | |
"Belgium UEFA", | |
"11", | |
"Uruguay CONMEBOL", | |
"22", | |
"Serbia UEFA", | |
"27", | |
"Morocco CAF", | |
"Gustaf Andreasen", | |
"7", | |
"Portugal UEFA", | |
"10", | |
"Poland UEFA", | |
"23", | |
"Peru CONMEBOL", | |
"26", | |
"Costa Rica CONCACAF", | |
"Kyle Castagno", | |
"8", | |
"England UEFA", | |
"9", | |
"Croatia UEFA", | |
"24", | |
"Nigeria CAF", | |
"25", | |
"Australia AFC" | |
) | |
} | |
player_goals_url <- "https://indianexpress.com/section/fifa/stats/" | |
player_goals_html <- read_html(player_goals_url) | |
team_golden_boot <- player_goals_html %>% | |
html_nodes("div.points-body.clear") %>% | |
html_nodes("div.gl-dtl") %>% | |
extract2(1) %>% | |
html_nodes("small") %>% | |
html_text() | |
# TRANSFORM DATA ---------------------------------------------------------- | |
teams <- tibble(TEAM_NAME = json_data$teams$name, | |
TEAM_ID = json_data$teams$id, | |
TEAM_FIFACODE = json_data$teams$fifaCode) | |
team_ids <- map2(teams$TEAM_NAME, teams$TEAM_ID, set_names) %>% flatten_chr() | |
team_codes <- map2(teams$TEAM_FIFACODE, teams$TEAM_NAME, set_names) %>% flatten_chr() | |
group_results <- tibble(GROUP = map_chr(json_data$groups, ~.x$name), | |
GROUP_WINNER = map_chr(json_data$groups, ~ team_ids[.x$winner]), | |
GROUP_RUNNERUP = map_chr(json_data$groups, ~ team_ids[.x$runnerup])) | |
games <- map_df(json_data$groups, ~ mutate(.x$matches, type = .x$name)) %>% | |
bind_rows(map_df(json_data$knockout, ~ mutate(.x$matches, type = .x$name))) %>% | |
clean_names(case = "screaming_snake") %>% | |
select(DATE, NAME, TYPE, matches("HOME|AWAY"), FINISHED) %>% | |
mutate(DATE = as.Date(DATE)) %>% | |
gather(COL, TEAM_ID, matches("TEAM|WINNER")) %>% | |
left_join(teams, by = "TEAM_ID") %>% | |
select(-TEAM_ID, -TEAM_FIFACODE) %>% | |
spread(COL, TEAM_NAME) %>% | |
transmute(DATE, | |
GAME_NUMBER = NAME, | |
GAME_ID = as.character(glue("{DATE}-{team_codes[HOME_TEAM]}-{team_codes[AWAY_TEAM]}")), | |
GAME_TYPE = TYPE, | |
HOME_TEAM, AWAY_TEAM, HOME_RESULT, AWAY_RESULT, HOME_PENALTY, AWAY_PENALTY, FINISHED) %>% | |
as_tibble | |
draft_order <- draft_raw() %>% | |
mutate(GRP = ntile(row_number(),8)) %>% | |
group_by(GRP) %>% | |
transmute(DRAFT_PICK = as.integer(str_extract(DAT, "\\d+")), | |
TEAM = str_extract(DAT, ".+(?=\\s)"), | |
COACH = str_extract(first(DAT),".+(?=\\s)")) %>% | |
mutate(TEAM = case_when( TEAM == "Korea Republic" ~ "South Korea", TRUE ~ TEAM)) %>% | |
slice(-1) %>% | |
ungroup %>% | |
select(-GRP) %>% | |
fill(DRAFT_PICK, .direction = "down") %>% | |
fill(TEAM, .direction = "up") %>% | |
distinct() | |
# MAKE RANKING ------------------------------------------------------------ | |
get_score <- function(x, for_team = TRUE, score_type){ | |
if(for_team){ | |
filter(x, str_detect(SCORE_TYPE, unique(x$TEAM_STATUS)) & str_detect(SCORE_TYPE, score_type) ) %>% pull(SCORE_VALUE) | |
}else{ | |
filter(x, str_detect(SCORE_TYPE, unique(x$TEAM_STATUS_OPPONENT)) & str_detect(SCORE_TYPE, score_type) ) %>% pull(SCORE_VALUE) | |
} | |
} | |
games_long <- games %>% | |
gather(TEAM_STATUS, TEAM_NAME, c(HOME_TEAM, AWAY_TEAM)) %>% | |
mutate(TEAM_STATUS = str_extract(TEAM_STATUS,"^.{4}")) %>% | |
gather(SCORE_TYPE, SCORE_VALUE, matches("HOME|AWAY")) %>% | |
mutate(TEAM_STATUS_OPPONENT = if_else(TEAM_STATUS %in% "HOME", "AWAY", "HOME")) %>% | |
group_by(GAME_TYPE, DATE, GAME_NUMBER, TEAM_NAME) %>% | |
nest() %>% | |
filter(!is.na(TEAM_NAME)) %>% | |
mutate(GOALS_FOR = map_int(data, get_score, score_type = "RESULT"), | |
GOALS_AGAINST = map_int(data, get_score, for_team = FALSE, score_type = "RESULT"), | |
PENALTIES_FOR = map_int(data, get_score, score_type = "PENALTY"), | |
PENALTIES_AGAINST = map_int(data, get_score, for_team = FALSE, score_type = "PENALTY")) %>% | |
mutate(GAME_RESULT = case_when( | |
is.na(PENALTIES_FOR) & GOALS_FOR - GOALS_AGAINST > 0 ~ "win", | |
is.na(PENALTIES_FOR) & GOALS_FOR - GOALS_AGAINST < 0 ~ "loss", | |
is.na(PENALTIES_FOR) & GOALS_FOR - GOALS_AGAINST == 0 ~ "tie", | |
PENALTIES_FOR > PENALTIES_AGAINST ~ "win", | |
PENALTIES_FOR < PENALTIES_AGAINST ~ "loss", | |
TRUE ~ NA_character_ | |
)) %>% | |
select(-data) %>% | |
full_join(draft_order, by = c(TEAM_NAME = "TEAM")) | |
games_long_points <- games_long %>% | |
mutate(GAME_POINTS = case_when( | |
str_detect(GAME_TYPE, "Group") & GAME_RESULT %in% "win" ~ 3, | |
str_detect(GAME_TYPE, "Group") & GAME_RESULT %in% "tie" ~ 1, | |
GAME_TYPE %in% "Round of 16" & GAME_RESULT %in% "win" ~ 8, | |
GAME_TYPE %in% "Quarter-finals" & GAME_RESULT %in% "win" ~ 10, | |
GAME_TYPE %in% "Semi-finals" & GAME_RESULT %in% "win" ~ 12, | |
GAME_TYPE %in% "Third place play-off" & GAME_RESULT %in% "win" ~ 6, | |
GAME_TYPE %in% "Final" & GAME_RESULT %in% "win" ~ 15, | |
GAME_RESULT %in% "loss" ~ 0, | |
TRUE ~ NA_real_ | |
)) | |
games_wide <- games_long_points %>% | |
group_by(TEAM_NAME, GAME_TYPE) %>% | |
mutate(GAME_NUM = case_when( | |
str_detect(GAME_TYPE, "Group") ~ cut(DATE, breaks = 3, labels = c("Game1","Game2","Game3"), right = FALSE) %>% as.character(), | |
TRUE ~ "" | |
)) %>% | |
mutate(STAGE = case_when( | |
str_detect(GAME_TYPE, "Group") ~ str_c("GROUP",GAME_NUM,sep = "_"), | |
TRUE ~ GAME_TYPE | |
)) %>% | |
spread(STAGE, GAME_POINTS) %>% | |
clean_names(case = "screaming_snake") %>% | |
group_by(TEAM_NAME) %>% | |
summarise_all(first_not_na) %>% | |
mutate(GROUP_RESULT = case_when( | |
TEAM_NAME %in% group_results$GROUP_WINNER ~ "winner", | |
TEAM_NAME %in% group_results$GROUP_RUNNERUP ~ "runnerup", | |
TRUE ~ NA_character_ | |
)) %>% | |
mutate(GROUP_POINTS = case_when( | |
GROUP_RESULT %in% "winner" ~ 6, | |
GROUP_RESULT %in% "runnerup" ~ 4, | |
TRUE ~ NA_real_ | |
)) %>% | |
mutate(GOLDEN_BOOT_BONUS = case_when( | |
TEAM_NAME %in% team_golden_boot ~ 5, | |
TRUE ~ NA_real_ | |
)) %>% | |
select(COACH, DRAFT_PICK,TEAM_NAME,GROUP_RESULT,GROUP_GAME1:GROUP_GAME3,GROUP_POINTS,ROUND_OF_16,QUARTER_FINALS, SEMI_FINALS, THIRD_PLACE = THIRD_PLACE_PLAY_OFF, FINAL, GOLDEN_BOOT_BONUS ) %>% | |
gather(COL, POINTS, GROUP_GAME1:GOLDEN_BOOT_BONUS) %>% | |
group_by(TEAM_NAME) %>% | |
mutate(TOTAL_POINTS = sum(POINTS, na.rm = TRUE)) %>% | |
spread(COL, POINTS) %>% | |
select(COACH:GROUP_RESULT, GROUP_GAME1, GROUP_GAME2, GROUP_GAME3, GROUP_POINTS, ROUND_OF_16,QUARTER_FINALS, SEMI_FINALS, THIRD_PLACE, FINAL, GOLDEN_BOOT_BONUS,TOTAL_POINTS) | |
coach_rank <- games_wide %>% | |
group_by(COACH) %>% | |
summarise(GROUP = sum(GROUP_GAME1,GROUP_GAME2,GROUP_GAME3, GROUP_POINTS,na.rm = TRUE), | |
ROUND_OF_16 = sum(ROUND_OF_16, na.rm = TRUE), | |
QUARTERFINAL = sum(QUARTER_FINALS, na.rm = TRUE), | |
SEMIFINAL = sum(SEMI_FINALS, na.rm = TRUE), | |
THIRD_PLACE = sum(THIRD_PLACE, na.rm = TRUE), | |
FINAL = sum(FINAL, na.rm = TRUE), | |
GOLDEN_BOOT = sum(GOLDEN_BOOT_BONUS,na.rm = TRUE), | |
TOTAL = sum(TOTAL_POINTS, na.rm = TRUE)) %>% | |
mutate(RANK = dense_rank(desc(TOTAL)), | |
UPDATED = as.character(Sys.time())) %>% | |
select(RANK,COACH:TOTAL,UPDATED) %>% | |
arrange(desc(TOTAL)) | |
# UPLOAD DATA ------------------------------------------------------------- | |
sheet_key <- as_id("1h2pKCl0N-xhz7WBXQ5XjcTwL-nxmHUr1HWwfCo3fS7w") | |
dd_ss <- gs_key(sheet_key, lookup = NULL, visibility = NULL, verbose = TRUE) | |
upload_games_points_long <- function(){ | |
gs_edit_cells(dd_ss, | |
ws = "UPLOAD_GAMES_RAW", | |
input = games_long_points, | |
byrow = FALSE, | |
col_names = TRUE, | |
trim = FALSE, | |
verbose = TRUE) | |
} | |
upload_games_wide <- function(){ | |
gs_edit_cells(dd_ss, | |
ws = "UPLOAD_COACHES_RAW", | |
input = games_wide, | |
byrow = FALSE, | |
col_names = TRUE, | |
trim = FALSE, | |
verbose = TRUE) | |
} | |
upload_coach_rank <- function(){ | |
gs_edit_cells(dd_ss, | |
ws = "UPLOAD_COACH_RANK_RAW", | |
input = coach_rank, | |
byrow = FALSE, | |
col_names = TRUE, | |
trim = FALSE, | |
verbose = TRUE) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment