Last active
April 2, 2019 14:28
-
-
Save Ryo-N7/7912a8b7160a189aa03166e5532e4e71 to your computer and use it in GitHub Desktop.
Goal Contribution Matrix (J.League: 2018 Season)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pacman::p_load(tidyverse, polite, scales, ggimage, | |
rvest, glue, extrafont, ggrepel, magick) | |
loadfonts() | |
url <- "https://www.transfermarkt.com/j1-league/startseite/wettbewerb/JAP1/plus/?saison_id=2017" | |
session <- bow(url) | |
team_links <- scrape(session) %>% | |
html_nodes("#yw1 > table > tbody > tr > td.zentriert.no-border-rechts > a") %>% | |
html_attr("href") | |
team_links_df <- team_links %>% | |
enframe(name = NULL) %>% | |
separate(value, c(NA, "team_name", NA, NA, "team_num", NA, NA), sep = "/") %>% | |
mutate(link = glue("https://www.transfermarkt.com/{team_name}/leistungsdaten/verein/{team_num}/reldata/JAP1%262017/plus/1")) | |
# tategaki function for vertical axis-text | |
tategaki_alt <- function(x){ | |
x <- stringr::str_replace_all(x, "ー", "丨") | |
stringr::str_wrap(x, width = 1) | |
} | |
# add_logo function for adding logo via magick package | |
add_logo <- function(plot_path, logo_path, logo_position, logo_scale = 10){ | |
# Requires magick R Package https://github.com/ropensci/magick | |
# Useful error message for logo position | |
if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) { | |
stop("Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'") | |
} | |
# read in raw images | |
plot <- magick::image_read(plot_path) | |
logo_raw <- magick::image_read(logo_path) | |
# get dimensions of plot for scaling | |
plot_height <- magick::image_info(plot)$height | |
plot_width <- magick::image_info(plot)$width | |
# default scale to 1/10th width of plot | |
# Can change with logo_scale | |
logo <- magick::image_scale(logo_raw, as.character(plot_width/logo_scale)) | |
# Get width of logo | |
logo_width <- magick::image_info(logo)$width | |
logo_height <- magick::image_info(logo)$height | |
# Set position of logo | |
# Position starts at 0,0 at top left | |
# Using 0.01 for 1% - aesthetic padding | |
if (logo_position == "top right") { | |
x_pos = plot_width - logo_width - 0.01 * plot_width | |
y_pos = 0.01 * plot_height | |
} else if (logo_position == "top left") { | |
x_pos = 0.01 * plot_width | |
y_pos = 0.01 * plot_height | |
} else if (logo_position == "bottom right") { | |
x_pos = plot_width - logo_width - 0.01 * plot_width | |
y_pos = plot_height - logo_height - 0.01 * plot_height | |
} else if (logo_position == "bottom left") { | |
x_pos = 0.01 * plot_width | |
y_pos = plot_height - logo_height - 0.01 * plot_height | |
} | |
# Compose the actual overlay | |
magick::image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos)) | |
} | |
# BIG FUNCTION | |
jleague_stats_info <- function(link) { | |
session <- bow(link) | |
player_name_info <- scrape(session) %>% | |
html_nodes("#yw1 .bilderrahmen-fixed") %>% | |
html_attr("title") | |
num_goals_info <- scrape(session) %>% | |
html_nodes("td:nth-child(7)") %>% | |
html_text() | |
num_assists_info <- scrape(session) %>% | |
html_nodes("td:nth-child(8)") %>% | |
html_text() | |
resultados <- list(player_name_info, num_goals_info, num_assists_info) | |
col_names <- c("name", "goals", "assists") | |
jleague_stats <- resultados %>% | |
reduce(cbind) %>% | |
as_tibble() %>% | |
set_names(col_names) | |
} | |
# ALL 18 TEAMS AT ONCE, WILL TAKE A WHILE: | |
# You can also split this up to do every 3-4 teams and then `reduce(rbind)` to combine them | |
goal_contribution_df_ALL <- map2(.x = team_links_df$link, | |
.y = team_links_df$team_name, | |
~ jleague_stats_info(link = .x) %>% mutate(team = .y)) | |
goal_contribution_df <- goal_contribution_df_ALL %>% | |
reduce(rbind) | |
# CLEAN data | |
goal_contribution_clean_df <- goal_contribution_df %>% | |
mutate_at(.vars = c("goals", "assists"), | |
~str_replace(., "-", "0") %>% as.numeric) %>% | |
mutate(team = team %>% str_replace_all(., "-", " ") %>% str_to_title) %>% | |
group_by(team) %>% | |
mutate(total_goals = sum(goals), | |
total_assists = sum(assists), | |
goal_contrib = goals/total_goals, | |
assist_contrib = assists/total_goals) %>% | |
ungroup() | |
## save | |
saveRDS(goal_contribution_clean_df, | |
file = glue("{here::here()}/data/goal_contrib_clean_df.RDS")) | |
goal_contribution_clean_df %>% | |
ggplot(aes(assist_contrib, goal_contrib)) + | |
geom_point(data = goal_contribution_clean_df %>% | |
filter(goal_contrib < 0.25 | assist_contrib < 0.15), | |
color = "grey20", size = 4, alpha = 0.2) + | |
geom_point(data = goal_contribution_clean_df %>% | |
filter(goal_contrib > 0.25 | assist_contrib > 0.15), | |
color = "red", size = 4) + | |
geom_hline(yintercept = 0.25, color = "grey20", alpha = 0.4) + | |
geom_vline(xintercept = 0.15, color = "grey20", alpha = 0.4) + | |
geom_text_repel(data = goal_contribution_clean_df %>% | |
filter(goal_contrib > 0.25 | assist_contrib > 0.15), | |
aes(label = name, family = "Roboto Condensed", fontface = "bold"), | |
seed = 7, size = 5, | |
min.segment.length = 0, segment.color = "red", | |
point.padding = 0.5) + | |
scale_x_continuous(labels = percent_format(accuracy = 1), | |
breaks = c(0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3), | |
limits = c(0, 0.3)) + | |
scale_y_continuous(labels = percent_format(accuracy = 1), | |
breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5), | |
limits = c(0, 0.5)) + | |
labs(title = "得点貢献度 (チームのゴール・アシスト割合)", | |
subtitle = "J.League 2018 シーズン", | |
caption = glue(" | |
データ: transfermarkt.com | |
作: @R_by_Ryo"), | |
x = "アシスト割合", | |
y = tategaki_alt("ゴール割合")) + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed"), | |
title = element_text(size = 20), | |
plot.subtitle = element_text(size = 18), | |
plot.caption = element_text(size = 8), | |
axis.title = element_text(size = 15), | |
axis.title.y = element_text(angle = 0, vjust= 0.5), | |
axis.text = element_text(size = 14), | |
panel.grid.minor.x = element_blank()) -> goal_contribution_matrix_jp | |
ggsave(plot = goal_contribution_matrix_jp, | |
"../J-League 2018/output/goal_contribution_matrix_plot_jp.png", | |
height = 8, width = 10) | |
plot_logo <- add_logo( | |
plot_path = "../J-League 2018/output/goal_contribution_matrix_plot_jp.png", | |
logo_path = "https://upload.wikimedia.org/wikipedia/en/3/31/J.League_%28local%29.png", | |
logo_position = "top right", | |
logo_scale = 18) | |
image_write(image = plot_logo, | |
"../J-League 2018/output/goal_contribution_matrix_plot_jp_logo.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment