Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
J-League: Average Value vs. Average Age Plot
# saw these types of plots for different leagues on r/soccer
# (originally by Twitter: @fussbALEXperte)
# so thought I'd make my own version using R!
library(rvest)
library(polite)
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
library(scales)
library(ggimage)
library(stringr)
library(glue)
url <- "https://www.transfermarkt.com/j-league-division-1/startseite/wettbewerb/JAP1/saison_id/2017"
session <- bow(url)
# grab team name from img instead...
team_name <- scrape(session) %>%
html_nodes("#yw1 > table > tbody > tr > td.zentriert.no-border-rechts > a > img") %>%
html_attr("alt")
# average age
avg_age <- scrape(session) %>%
html_nodes("tbody .hide-for-pad:nth-child(5)") %>%
html_text()
# average value
avg_value <- scrape(session) %>%
html_nodes("tbody .rechts+ .hide-for-pad") %>%
html_text()
# team image
team_img <- scrape(session) %>%
html_nodes("#yw1 > table > tbody > tr > td.zentriert.no-border-rechts > a > img") %>%
html_attr("src")
# combine above into one list
resultados <- list(team_name, avg_age, avg_value, team_img)
# specify column names
col_name <- c("team", "avg_age", "avg_value", "img")
# Combine into one dataframe
j_league_age_value_raw <- resultados %>%
reduce(cbind) %>%
as_data_frame() %>%
set_names(col_name)
# Clean up the values
j_league_age_value <- j_league_age_value_raw %>%
mutate(avg_age = avg_age %>% str_replace_all(",", ".") %>% as.numeric(),
avg_value = avg_value %>%
str_replace_all("Th. \200", "000") %>%
str_replace(" ", "") %>%
as.numeric(),
img = img %>% str_replace("/tiny/", "/head/")) %>%
# Googled Euro-to-Yen exchange rate: 1 Euro = 129.39 Yen (8.25.18)
mutate(avg_value = (avg_value * 129.39) %>% round(digits = 0),
avg_value = avg_value / 10000)
# Plot!
j_league_age_value %>%
ggplot(aes(x = avg_age, y = avg_value)) +
geom_image(aes(image = img), size = 0.065) +
scale_x_continuous(breaks = pretty_breaks(5),
limits = c(24, 29) ) +
scale_y_continuous(labels = comma,
breaks = pretty_breaks(5)) +
labs(title = "Jリーグ: 各チームの平均推定市場価格 vs. 平均年齢",
subtitle = "",
caption = "データ:transfermarkt.com",
x = "平均年齢",
y = "平均推定市場価格 (万円)") +
theme_minimal() +
theme(text = element_text(family = "IPAexGothic"))
ggsave(filename = "j_league_age_value_plot.png", height = 6, width = 8)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment