Skip to content

Instantly share code, notes, and snippets.

@emordonez
Last active December 7, 2022 03:45
Show Gist options
  • Save emordonez/5fe7d760ceba4aa0c930ca6496ba1b42 to your computer and use it in GitHub Desktop.
Save emordonez/5fe7d760ceba4aa0c930ca6496ba1b42 to your computer and use it in GitHub Desktop.
Sample visualizations of European soccer transfer data from Transfermarkt, created with R and ggplot2. See the README for the source repo and data set.

Transfermarkt Visualizations

These are sample visualizations, created with R and ggplot2, of soccer transfer data web scraped from Transfermarkt. If you'd like to see the data first, see this GitHub repo. You can also download the data set directly via DownGit (although transfers-0.R already downloads it for setup). The data directory should be at the top level like so:

├── data
├── transfermarkt-visualizaitons.R
├── transfers-0.R
├── transfers-1.R
├── transfers-2.R
├── transfers-3.R
├── transfers-4.R
├── transfers-5.R
└── README.md
# Setup
source("./transfers-0.R")
# Premier League 2020/21
source("./transfers-1.R")
# Big Six spending
source("./transfers-2.R")
# Most expensive transfers
source("./transfers-3.R")
# Premier League trend
source("./transfers-4.R")
# Total league spending
source("./transfers-5.R")
#'
#' 0. Setup
#'
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
"data.table", "dplyr", "ggfittext", "ggplot2",
"Hmisc", "readr", "showtext", "treemap"
)
# Utility function to read and bind data frames
get_league_df <- function(league) {
dirs <- dir("./data", pattern = "[12][0129]\\d\\d")
files <- file.path("./data", dirs, paste0(sprintf("%s.csv", league)))
data <- lapply(files, read_csv)
return (rbindlist(data, use.names = TRUE))
}
# Utility function to save ggplots
save_image <- function(filename, plot, width = 7, height = 7) {
ggsave(
sprintf("%s.png", filename),
plot,
width = width, height = height,
units = "in", dpi = 96
)
ggsave(
sprintf("svg/%s.svg", filename),
plot,
width = width, height = height,
units = "in", dpi = 96
)
}
# Download and unzip data set
download.file(
"https://drive.google.com/uc?export=download&id=1i_UREnRvRtPDKpADQRmE6_EMgHbzF0vP",
destfile = "data.zip"
)
system("mkdir -p data && unzip data.zip -d data")
# Euro-to-pound exchange rate
eur_to_gbp <- 0.87
# ggplot theme and text settings
plot_settings <- list(
labs(
caption = "Data: Transfermarkt | github.com/emordonez"
),
theme_minimal(),
theme(
legend.position = "none",
plot.margin = margin(10, 10, 10, 10, "pt"),
text = element_text(family = "Open Sans"),
plot.title = element_text(face = "bold"),
plot.subtitle = element_text(face = "plain"),
plot.caption = element_text(face = "italic"),
axis.title.x = element_text(margin = margin(7, 0, 0, 0, "pt")),
axis.title.y = element_text(margin = margin(0, 7, 0, 0, "pt"))
)
)
font_add_google("Open Sans", "Open Sans")
showtext_auto(enable = TRUE)
#'
#' 1. Premier League 2020/21
#'
pl_2020 <- read_csv("./data/2020/premier-league.csv") %>%
filter(fee > 0) %>%
select(club, name, fee, movement)
pl_club_spending <- pl_2020 %>%
filter(movement == "in") %>%
group_by(club) %>%
dplyr::summarize(expenditure = sum(fee, na.rm = TRUE)) %>%
mutate(expenditure = eur_to_gbp * expenditure / 1e6)
pl_club_sales <- pl_2020 %>%
filter(movement == "out") %>%
group_by(club) %>%
dplyr::summarize(income = sum(fee, na.rm = TRUE)) %>%
mutate(income = eur_to_gbp * income / 1e6)
pl_club_record <- merge(pl_club_spending, pl_club_sales, all = TRUE)
pl_club_record[is.na(pl_club_record)] <- 0
pl_club_record <- mutate(pl_club_record, profit = income - expenditure)
# Clean up club names, abbreviations, and colors for presentation
pl_club_record$club <- c(
"Arsenal", "Aston Villa", "Brighton", "Burnley", "Chelsea",
"Crystal Palace", "Everton", "Fulham", "Leeds Utd", "Leicester City",
"Liverpool", "Man City", "Man Utd", "Newcastle Utd", "Sheffield Utd",
"Southampton", "Tottenham", "West Brom", "West Ham Utd", "Wolverhampton"
)
pl_club_record$club_abbrev <- c(
"ARS", # Arsenal
"AVL", # Aston Villa
"BHA", # Brighton & Hove Albion
"BUR", # Burnley
"CHE", # Chelsea
"CRY", # Crystal Palace
"EVE", # Everton
"FUL", # Fulham
"LEE", # Leeds United
"LEI", # Leicester City
"LIV", # Liverpool
"MCI", # Manchester City
"MUN", # Manchester United
"NEW", # Newcastle United
"SHU", # Sheffield United
"SOU", # Southampton
"TOT", # Tottenham Hotspur
"WBA", # West Bromwich Albion
"WHU", # West Ham United
"WOL" # Wolverhampton Wanderers
)
pl_club_record$color <- c(
"#EF0107", # Arsenal
"#670E36", # Aston Villa
"#0057B8", # Brighton & Hove Albion
"#6C1D45", # Burnley
"#034694", # Chelsea
"#1B458F", # Crystal Palace
"#003399", # Everton
"#CC0000", # Fulham
"#FFCD00", # Leeds United
"#003090", # Leicester City
"#C8102E", # Liverpool
"#6CABDD", # Manchester City
"#DA291C", # Manchester United
"#241F20", # Newcastle United
"#EE2737", # Sheffield United
"#D71920", # Southampton
"#132257", # Tottenham Hotspur
"#122F67", # West Bromwich Albion
"#7A263A", # West Ham United
"#FDB913" # Wolverhampton Wanderers
)
#'
#' PLOT 1-1: Premier League transfer spending by club
#'
total_expenditure <- sum(pl_club_record$expenditure)
viz_club_spending <- pl_club_record %>%
ggplot(aes(x = reorder(club, expenditure), y = expenditure)) +
geom_col(aes(alpha = 0.75, fill = club)) +
geom_text(
aes(label = sprintf("%0.2f", expenditure), hjust = -0.2),
size = 3.5
) +
labs(
title = "Premier League transfer spending by club",
subtitle = paste(
"Transfer spending in the 2020/21 windows totaled",
sprintf("\u00A3%0.2f billion.", total_expenditure / 1e3)
),
x = NULL,
y = "Total transfer expenditure (million \u00A3)"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
scale_fill_manual(values = pl_club_record$color) +
plot_settings +
theme(
axis.title.x = element_text(margin = margin(5, 0, 0, 0, "pt")),
axis.line.y = element_line(color = "black", size = 0.5, linetype = 1)
) +
coord_flip()
save_image("transfers-1-1", viz_club_spending)
#'
#' PLOT 2: No Premier League club made a transfer profit in 2020/21
#'
viz_club_profits <- pl_club_record %>%
ggplot(aes(x = reorder(club_abbrev, profit))) +
geom_col(aes(y = -expenditure, alpha = 0.75, fill = club_abbrev)) +
geom_col(aes(y = income, alpha = 0.75, fill = club_abbrev)) +
geom_hline(yintercept = 0) +
geom_text(
aes(
label = sprintf("%0.0f", -expenditure),
y = -expenditure,
hjust = 1.2
),
size = 3.5
) +
geom_text(
aes(
label = sprintf("%0.0f", income),
y = income,
hjust = -0.4
),
size = 3.5
) +
labs(
title = "No Premier League club made a transfer profit in 2020/21",
subtitle = paste(
"Clubs are ordered by decreasing net transfer value, with",
"expenditures to the left \nof 0 and incomes to the right."
),
x = NULL,
y = "Net transfer value (million \u00A3)"
) +
scale_fill_manual(values = pl_club_record$color) +
plot_settings +
theme(axis.title.x = element_text(margin = margin(5, 0, 0, 0, "pt"))) +
coord_flip()
save_image("transfers-1-2", viz_club_profits)
#'
#' 2. Big Six spending
#'
big_six <- c(
"Arsenal FC", "Chelsea FC", "Liverpool FC",
"Manchester City", "Manchester United", "Tottenham Hotspur"
)
pl_transfers <- get_league_df("premier-league") %>%
filter(season >= 2010 & season < 2020)
#'
#' PLOT 2-1: League spending is catching up with the Big Six
#'
pl_transfer_history <- pl_transfers %>%
filter(movement == "in" & fee > 0) %>%
mutate(club = ifelse(club %in% big_six, club, "Rest of Premier League")) %>%
mutate(
club = case_when(
club == "Arsenal FC" ~ "Arsenal",
club == "Chelsea FC" ~ "Chelsea",
club == "Liverpool FC" ~ "Liverpool",
club == "Manchester City" ~ "Man City",
club == "Manchester United" ~ "Man Utd",
club == "Tottenham Hotspur" ~ "Tottenham",
TRUE ~ club
)
) %>%
select(club, fee, season)
# Hack to get around Tottenham spending nothing in 2018
spurs_2018 <- data.frame("Tottenham", 0, 2018)
names(spurs_2018) <- names(pl_transfer_history)
pl_transfer_history <- pl_transfer_history %>%
rbind(spurs_2018) %>%
group_by(season, club) %>%
dplyr::summarize(expenditure = sum(fee, na.rm = TRUE)) %>%
mutate(proportion = expenditure / sum(expenditure))
# Factor order to list the Big Six first
league_order <- c(
"Arsenal", "Chelsea", "Liverpool", "Man City", "Man Utd", "Tottenham",
"Rest of Premier League"
)
league_colors <- c(
"#EF0107", # Arsenal
"#034694", # Chelsea
"#C8102E", # Liverpool
"#6CABDD", # Manchester City
"#DA291C", # Manchester United
"#132257", # Tottenhambottom
"#00FF87" # Rest of Premier League
)
pl_transfer_history$club <- factor(pl_transfer_history$club, levels = league_order)
big_six_2010 <- sum(
filter(
pl_transfer_history, season == 2010 & club != "Rest of Premier League"
)$expenditure
)
expenditure_2010 <- sum(filter(pl_transfer_history, season == 2010)$expenditure)
proportion_2010 <- big_six_2010 / expenditure_2010
big_six_2019 <- sum(
filter(
pl_transfer_history, season == 2019 & club != "Rest of Premier League"
)$expenditure
)
expenditure_2019 <- sum(filter(pl_transfer_history, season == 2019)$expenditure)
proportion_2019 <- big_six_2019 / expenditure_2019
viz_pl_comparison <- pl_transfer_history %>%
ggplot(aes(x = season, y = proportion, fill = club)) +
geom_area(alpha = 0.75, size = 1) +
labs(
title = "League spending is catching up with the Big Six",
subtitle = paste(
"The Big Six accounted for",
sprintf("%0.1f%% of the \u00A3%0.0fm spent in 2010 compared to",
proportion_2010 * 100, expenditure_2010 / 1e6
),
sprintf("%0.1f%% \nof the \u00A3%0.2fb spent in 2019.",
proportion_2019 * 100, expenditure_2019 / 1e9
)
),
x = "Season",
y = "Proportion of total transfer spending"
) +
scale_x_continuous(breaks = pl_transfer_history$season) +
scale_y_continuous(expand = expansion(mult = c(0, 0))) +
scale_fill_manual(values = alpha(league_colors, 0.75)) +
plot_settings +
theme(
legend.title = element_blank(),
legend.position = "top",
legend.justification = c(0, 1),
legend.text = element_text(size = 10)
)
save_image("transfers-2-1", viz_pl_comparison)
#'
#' PLOT 2-2: Three big spenders among the Big Six
#'
big_six_transfers <- pl_transfers %>%
filter(movement == "in" & fee > 0 & club %in% big_six) %>%
mutate(
club = case_when(
club == "Arsenal FC" ~ "Arsenal",
club == "Chelsea FC" ~ "Chelsea",
club == "Liverpool FC" ~ "Liverpool",
club == "Tottenham Hotspur" ~ "Tottenham",
TRUE ~ club
)
) %>%
select(club, fee, season) %>%
rbind(spurs_2018) %>%
group_by(club, season) %>%
dplyr::summarize(expenditure = sum(fee, na.rm = TRUE)) %>%
mutate(expenditure = eur_to_gbp * expenditure / 1e6)
club_colors <- c(
"#EF0107", # Arsenal
"#034694", # Chelsea
"#C8102E", # Liverpool
"#6CABDD", # Manchester City
"#DA291C", # Manchester United
"#132257" # Tottenham
)
viz_big_six <- big_six_transfers %>%
ggplot(aes(x = season, y = expenditure, group = club)) +
geom_area(aes(alpha = 0.75, fill = club)) +
labs(
title = "Three big spenders among the Big Six",
subtitle = paste(
"The Manchester clubs and Chelsea won every league title in the",
"decade except two. \nEach spent more than \u00A31 billion over",
"20 windows."
),
x = "Season",
y = "Total transfer spending (million \u00A3)"
) +
scale_x_continuous(breaks = c(2010, 2012, 2014, 2016, 2018)) +
scale_fill_manual(values = club_colors) +
plot_settings +
theme(
text = element_text(family = "Open Sans", face = "bold"),
strip.text.x = element_text(size = 10),
axis.title = element_text(face = "plain"),
axis.text = element_text(face = "plain"),
) +
facet_wrap(~club)
save_image("transfers-2-2", viz_big_six)
#'
#' 3. Most expensive transfers
#'
pl <- get_league_df("premier-league")
buli <- get_league_df("1-bundesliga") %>% mutate(league = "Bundesliga")
laliga <- get_league_df("laliga") %>% mutate(league = "La Liga")
seriea <- get_league_df("serie-a")
ligue1 <- get_league_df("ligue-1")
transfers <- rbind(pl, buli, laliga, seriea, ligue1)
purchases <- transfers %>% filter(movement == "in" & fee > 0)
#'
#' PLOT 3-1: The 25 most ever expensive European transfers
#'
biggest_purchases <- purchases %>%
arrange(desc(fee)) %>%
slice(1:25) %>%
mutate(fee = fee / 1e6) %>%
mutate(
club = case_when(
club == "Arsenal FC" ~ "Arsenal",
club == "Chelsea FC" ~ "Chelsea",
club == "Liverpool FC" ~ "Liverpool",
club == "Manchester United" ~ "Man Utd",
club == "Manchester City" ~ "Man City",
club == "Atlético Madrid" ~ "Atl. Madrid",
club == "FC Barcelona" ~ "Barcelona",
club == "Real Madrid" ~ "R. Madrid",
club == "Paris Saint-Germain" ~ "PSG",
club == "Juventus FC" ~ "Juventus",
TRUE ~ club
)
) %>%
select(name, club, fee, season) %>%
arrange(name)
x1 <- rownames(biggest_purchases)
set_club_color <- function() {
return (case_when(
biggest_purchases$club == "R. Madrid" ~ "#00529F",
biggest_purchases$club == "Barcelona" ~ "#A50044",
TRUE ~ "grey30"
))
}
viz_biggest_purchases <- biggest_purchases %>%
ggplot(aes(x = reorder(x1, fee), y = fee)) +
geom_point(color = set_club_color()) +
geom_segment(
aes(x = x1, xend = x1, y = 0, yend = fee),
color = set_club_color()
) +
geom_text(
aes(
label = sprintf("\u20AC%.fm", fee),
y = fee,
hjust = -0.2,
fontface = "bold"
),
color = set_club_color(),
size = 3.5
) +
geom_label(
aes(
label = sprintf("%s, %d", club, season),
y = 10,
hjust = 0,
fontface = case_when(
club == "R. Madrid" ~ "bold.italic",
club == "Barcelona" ~ "bold.italic",
TRUE ~ "italic"
)
),
color = set_club_color(),
size = 3.2,
label.size = NA
) +
labs(
title = "The 25 most ever expensive European transfers",
subtitle = paste(
"Barcelona and Real Madrid have paid ten of the biggest ever",
"transfer fees. \nNeymar and Ronaldo are the only players to make",
"the top list twice."
),
x = NULL,
y = NULL
) +
scale_x_discrete(
labels = biggest_purchases$name[order(biggest_purchases$fee)]
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
plot_settings +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
) +
coord_flip()
save_image("transfers-3-1", viz_biggest_purchases)
#'
#' PLOT 3-2: Biggest ever transfers adjusted for inflation
#'
eur_inflation_index <- setNames(c(
1.68, 1.63, 1.58, 1.54, 1.50, 1.47, 1.45, 1.43, 1.41, 1.37, 1.35, 1.32,
1.29, 1.26, 1.23, 1.21, 1.17, 1.15, 1.14, 1.12, 1.09, 1.06, 1.05, 1.06,
1.05, 1.04, 1.03, 1.01, 1.00
), as.character(1992:2020))
adj_biggest_purchases <- purchases %>%
mutate(fee = fee * eur_inflation_index[as.character(season)] / 1e6) %>%
arrange(desc(fee)) %>%
slice(1:25) %>%
mutate(
club = case_when(
club == "Arsenal FC" ~ "Arsenal",
club == "Chelsea FC" ~ "Chelsea",
club == "Liverpool FC" ~ "Liverpool",
club == "Manchester United" ~ "Man Utd",
club == "Manchester City" ~ "Man City",
club == "Atlético Madrid" ~ "Atl. Madrid",
club == "FC Barcelona" ~ "Barcelona",
club == "Real Madrid" ~ "R. Madrid",
club == "Paris Saint-Germain" ~ "PSG",
club == "Juventus FC" ~ "Juventus",
TRUE ~ club
)
) %>%
select(name, club, fee, season) %>%
arrange(name)
x2 <- rownames(adj_biggest_purchases)
galacticos <- c("Zinédine Zidane", "Luís Figo")
set_rma_color <- function() {
return (ifelse(adj_biggest_purchases$name %in% galacticos, "#00529F", "grey30"))
}
set_rma_size <- function(size, default) {
return (ifelse(adj_biggest_purchases$name %in% galacticos, size, default))
}
viz_adj_biggest_purchases <- adj_biggest_purchases %>%
ggplot(aes(x = reorder(x2, fee), y = fee)) +
geom_point(
color = set_rma_color(),
size = set_rma_size(3, 1.5)
) +
geom_segment(
aes(x = x2, xend = x2, y = 0, yend = fee),
color = set_rma_color(),
size = set_rma_size(1.25, 0.5)
) +
geom_text(
aes(
label = sprintf("\u20AC%.fm", fee),
y = fee,
hjust = -0.2,
fontface = "bold"
),
color = set_rma_color(),
size = set_rma_size(4.5, 3.5)
) +
geom_label(
aes(
label = sprintf("%s, %d", club, season),
y = 10,
hjust = 0,
fontface = ifelse(
adj_biggest_purchases$name %in% galacticos, "bold.italic", "italic"
)
),
color = set_rma_color(),
size = set_rma_size(4, 3.2),
label.size = NA
) +
labs(
title = "Biggest ever transfers adjusted for inflation",
subtitle = paste(
"Two of the original Real Madrid Galácticos, Zidane and Luis",
"Figo, are \nstill among the most expensive transfers."
),
x = NULL,
y = NULL
) +
scale_x_discrete(
labels = adj_biggest_purchases$name[order(adj_biggest_purchases$fee)]
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
plot_settings +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
) +
coord_flip()
save_image("transfers-3-2", viz_adj_biggest_purchases)
#'
#' 4. Premier League trend
#'
pl <- get_league_df("premier-league")
gbp_inflation_index <- setNames(c(
2.12, 2.08, 2.03, 1.97, 1.92, 1.86, 1.80, 1.77, 1.72, 1.69, 1.66, 1.62,
1.57, 1.53, 1.48, 1.42, 1.36, 1.37, 1.31, 1.25, 1.21, 1.17, 1.14, 1.13,
1.11, 1.08, 1.04, 1.02, 1.00
), as.character(1992:2020))
#'
#' PLOT 4-1: The real growth of Premier League transfer fees
#'
pl_trend <- pl %>%
filter(movement == "in" & fee > 0) %>%
select(name, fee, season) %>%
mutate(
fee = eur_to_gbp * fee * gbp_inflation_index[as.character(season)] / 1e6
)
viz_pl_trend <- pl_trend %>%
ggplot(aes(x = season, y = fee)) +
geom_point(alpha = 0.25) +
geom_smooth(
aes(color = "Median transfer fee with IQR"),
stat = "summary", alpha = 0.25, fill = "red",
fun.data = median_hilow, fun.args = list(conf.int = 0.5)
) +
geom_line(
aes(color = "Mean transfer fee"),
stat = "summary", fun = mean, size = 1
) +
geom_line(
aes(color = "Max transfer fee"),
stat = "summary", fun = max, size = 1
) +
labs(
title = "The real growth of Premier League transfer fees",
subtitle = paste(
"With over 3,000 fees paid since 1992, the typical transfer fee is",
"being driven \nupward by more frequent purchases in the upper extreme."
),
x = NULL,
y = "Million \u00A3 (real, 2020)"
) +
scale_color_manual(values = c("black", "blue", "red")) +
plot_settings +
theme(
legend.title = element_blank(),
legend.position = c(0, 1),
legend.justification = c(0, 1),
)
save_image("transfers-4-1", viz_pl_trend)
#'
#' PLOT 4-2: Premier League transfer inflation
#'
pl_inflation <- pl %>%
filter(movement == "in" & fee > 0) %>%
select(name, fee, season) %>%
group_by(season) %>%
dplyr::summarize(mean = mean(fee), median = median(fee)) %>%
mutate(
mean = eur_to_gbp * mean / 1e6,
median = eur_to_gbp * median / 1e6
)
viz_inflation <- pl_inflation %>%
ggplot(aes(x = season)) +
geom_line(aes(y = median, color = "Median transfer fee"), size = 1) +
geom_line(aes(y = mean, color = "Mean transfer fee"), size = 1) +
annotate(
"segment",
x = 1992,
y = pl_inflation$median[1],
xend = 2020,
yend = gbp_inflation_index[as.character(1992)] * pl_inflation$median[1],
color = "black",
size = 1
) +
annotate(
"text",
x = 2013,
y = 1.75,
label = "1992 median adj. for inflation",
size = 4
) +
labs(
title = "Premier League transfer inflation",
subtitle = paste(
"Adjusted for inflation, the median transfer fee has increased by",
"715% and the mean \nby 786% since 1992. Inflation in the UK has",
"averaged 2.7% per year since then."
),
x = NULL,
y = "Million \u00A3 (real, 2020)"
) +
scale_color_manual(values = c("blue", "red")) +
plot_settings +
theme(
legend.title = element_blank(),
legend.position= c(0, 1),
legend.justification = c(0, 1)
)
save_image("transfers-4-2", viz_inflation)
#'
#' 5. Total league spending
#'
pl <- get_league_df("premier-league")
buli <- get_league_df("1-bundesliga") %>% mutate(league = "Bundesliga")
laliga <- get_league_df("laliga") %>% mutate(league = "La Liga")
seriea <- get_league_df("serie-a")
ligue1 <- get_league_df("ligue-1")
transfers <- rbind(pl, buli, laliga, seriea, ligue1)
league_totals <- transfers %>%
filter(movement == "in" & fee > 0) %>%
mutate(
club = case_when(
# Choice of big clubs for the treemap
# Premier League
club == "Arsenal FC" ~ "Arsenal",
club == "Chelsea FC" ~ "Chelsea",
club == "Liverpool FC" ~ "Liverpool",
club == "Manchester City" ~ "Man City",
club == "Manchester United" ~ "Man Utd",
club == "Tottenham Hotspur" ~ "Tottenham",
# Bundesliga
club == "Bayern Munich" ~ "Bayern",
club == "Borussia Dortmund" ~ "Dortmund",
# La Liga
club == "Atlético Madrid" ~ "Atl. Madrid",
club == "Real Madrid" ~ "R. Madrid",
club == "FC Barcelona" ~ "Barcelona",
# Serie A
club == "AC Milan" | club == "Milan AC" ~ "AC Milan",
club == "AS Roma" ~ "Roma",
club == "Inter Milan" | club == "FC Internazionale" ~ "Inter Milan",
club == "Juventus FC" ~ "Juventus",
club == "SSC Napoli" ~ "Napoli",
# Ligue 1
club == "AS Monaco" ~ "Monaco",
club == "Paris Saint-Germain" ~ "PSG",
club == "Olympique Lyon" ~ "Lyon",
club == "Olympique Marseille" ~ "Marseille",
# The rest
TRUE ~ ""
)
) %>%
# League labeled with total expenditure, only for treemap
group_by(league) %>%
mutate(
league_label = paste(
league,
sprintf("%.2fb", sum(fee, na.rm = TRUE) / 1e9), sep = "\n"
)
) %>%
select(league_label, club, fee)
# Extract data from treemap for a ggplot
# Needs to run to get the spacing right
tm <- treemap(
league_totals,
index = c("league_label", "club"),
vSize = "fee",
type = "index"
)
tm_data <- tm$tm %>%
mutate(
x1 = x0 + w,
y1 = y0 + h
) %>%
mutate(
x = (x0 + x1) / 2,
y = (y0 + y1) / 2
) %>%
mutate(primary_group = ifelse(is.na(club), 1.2, 0.5)) %>%
mutate(color = ifelse(is.na(club), NA, color))
total_expenditure <- sum(league_totals$fee)
viz_treemap <- tm_data %>%
ggplot(aes(xmin = x0, ymin = y0, xmax = x1, ymax = y1)) +
geom_rect(
aes(fill = color, size = primary_group),
show.legend = FALSE,
color = "black",
alpha = 0.75
) +
labs(
title = "Total transfer spending in Europe's top 5 leagues since 1992",
subtitle = paste(
"Clubs across these leagues have spent",
sprintf(
"\u20AC%0.2f billion over 29 seasons.",
total_expenditure / 1e9
)
),
x = NULL,
y = NULL
) +
scale_fill_identity() +
scale_size(range = range(tm_data$primary_group)) +
ggfittext::geom_fit_text(
data = filter(tm_data, club == ""),
aes(label = league_label, fontface = "bold"),
min.size = 1
) +
ggfittext::geom_fit_text(
data = filter(tm_data, club != ""),
aes(label = club),
min.size = 1
) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
plot_settings +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank()
)
save_image("transfers-5-1", viz_treemap)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment