Skip to content

Instantly share code, notes, and snippets.

@z3tt
Last active March 24, 2022 21:15
Show Gist options
  • Save z3tt/45235c5a2ee0e03e61d03f8db710e3b7 to your computer and use it in GitHub Desktop.
Save z3tt/45235c5a2ee0e03e61d03f8db710e3b7 to your computer and use it in GitHub Desktop.
Dumbbell Chart of Pay Gaps in Europe
---
title: "Gender Pay Gap: Dumbbell Plot (EU only)"
author:
- name: "Cédric Scherer"
url: www.cedricscherer.com
output:
distill::distill_article:
highlight: kate
code_folding: false
toc: true
toc_depth: 2
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE,
dev = "ragg_png", dpi = 350, retina = 1)
```
# Setup
```{r packages}
library(tidyverse)
library(here)
library(janitor)
library(ggrepel)
library(ggtext)
library(colorspace)
library(pdftools)
```
# Data
```{r data}
paygap_raw <- read_csv(here::here("sdg_05_20_linear.csv"))
paygap <-
paygap_raw %>%
janitor::clean_names() %>%
dplyr::select(country = geo, year = time_period, gap = obs_value) %>%
filter(!country %in% c("CH", "NO", "IS")) %>%
mutate(country = if_else(str_detect(country, "EU27_"), "EU27", country))
paygap_comparison <-
paygap %>%
filter(year == 2020 | year <= 2010) %>%
mutate(
period = if_else(year == 2020, "today", "reference"),
gap = -gap
) %>%
group_by(country, period) %>%
summarize(
gap = mean(gap, na.rm = TRUE),
n = n()
) %>%
ungroup() %>%
pivot_wider(
id_cols = c(country),
names_from = period,
values_from = c(gap, n)
) %>%
filter(!is.na(gap_today)) %>%
dplyr::select(-n_today) %>%
mutate(
country = if_else(str_detect(country, "EU|EA"), paste0("<b style='font-family:Compagnon-Medium;'>", country, "</b>"), country),
country_today = fct_reorder(country, gap_today),
country_ref = fct_reorder(country, gap_reference),
change = gap_today - gap_reference,
trend = case_when(
change < -.5 ~ "Larger pay gap",
change > .5 ~ "Smaller pay gap",
TRUE ~ "Stable pay gap"
),
trend = factor(trend, levels = c("Larger pay gap", "Stable pay gap", "Smaller pay gap")),
v1 = if_else(change > 0, 1.45, -.45),
v2 = if_else(change > 0, -.25, 1.25),
percent_lab = paste0(scales::percent(abs(gap_today), scale = 1, accuracy = .1, trim = FALSE),
"<br><span style='font-size:8.2pt;font-family:Compagnon-Roman;'>",
if_else(change > 0, paste0("–", sprintf("%2.1f", abs(change)), "%"),
paste0("+", sprintf("%2.1f", abs(change)), "%")),
"</span>")
)
```
```{r dumbbell-graph, fig.height=12, fig.width=16}
pos <- as.numeric(paygap_comparison$country_today[which(str_detect(paygap_comparison$country_today, "EU|EA"))])
ggplot(paygap_comparison,
aes(x = as.numeric(country_today), y = gap_reference, color = trend)) +
## reference groups ----------------------------------------------------------
annotate(
geom = "rect",
xmin = pos - .55, xmax = pos + .55,
ymin = -30, ymax = -1.5,
fill = "grey30", alpha = c(.05, .075)
) +
annotate(
geom = "text",
x = c(pos[1] - .75, pos[2] + .75), y = -28.2,
label = c("The 19 European Union countries that\nuse the euro as the official currency",
"The 27 European Union countries\n(excluding the UK that left the EU)"),
hjust = c(1, 0), color = "grey50", family = "Compagnon-Roman",
size = 3.5, lineheight = .85
) +
## y grid --------------------------------------------------------------------
geom_hline(
yintercept = -1:-5*5,
color = "grey80", linetype = "42", size = .3
) +
geom_hline(
yintercept = c(0, -30),
color = "grey80", size = .6
) +
annotate(
geom = "rect",
xmin = 26.5, xmax = Inf,
ymin = .5, ymax = -.5,
fill = "white"
) +
## y-axis labels -------------------------------------------------------------
annotate(
geom = "text",
x = -1, y = -1:-5*5,
label = scales::percent(1:5*5, scale = 1, accuracy = 1, trim = FALSE),
hjust = 0, vjust = 1.7, color = "grey30", family = "Compagnon-Light", size = 6.7
) +
annotate(
geom = "text",
x = -1, y = c(1.7, 0, -30),
label = c("Women's gross hourly earnings were...",
"equal to those of male paid employees in the EU",
"30% below those of male paid employees in the EU"),
hjust = 0, vjust = 1.7, color = "#212121", family = "Compagnon-Roman", size = 6.7
) +
## legend --------------------------------------------------------------------
annotate(
geom = "segment",
x = 6.5, xend = 6.5, y = -8.5, yend = -6, size = .8,
arrow = arrow(type = "closed", length = unit(2.3, "mm")),
color = "grey60"
) +
annotate(
geom = "point",
x = 6.5, y = -8.5, size = 2.7,
color = "grey60"
) +
annotate(
geom = "text",
x = 6.75, y = c(-6, -8.1, -9),
label = c("Pay gap 2020",
"Pay gap 2002–10",
"Size encodes number\nof reference years"),
hjust = 0, color = "grey30", family = "Compagnon-Roman",
size = c(4, 4, 3.3), lineheight = .7
) +
## dumbbell labels -----------------------------------------------------------
ggtext::geom_richtext(
aes(label = country, vjust = v1,
color = trend, color = after_scale(darken(color, .2))),
family = "Compagnon-Roman", size = 5.3,
fill = NA, label.color = NA, label.padding = unit(rep(.1, 4), "lines"), show.legend = FALSE
) +
ggtext::geom_richtext(
aes(label = percent_lab, y = gap_today, vjust = v2,
color = trend, color = after_scale(darken(color, .2))),
family = "Compagnon-Medium", size = 3.3, lineheight = .85,
fill = NA, label.color = NA, label.padding = unit(rep(.1, 4), "lines"), show.legend = FALSE
) +
## dumbbell ------------------------------------------------------------------
geom_point(
aes(alpha = trend == "Stable pay gap", size = n_reference),
#size = 2,
show.legend = FALSE
) +
geom_segment(
aes(xend = as.numeric(country_today), yend = gap_today), size = 1.1,
arrow = arrow(type = "closed", length = unit(2.3, "mm"))
) +
## coord + scales ------------------------------------------------------------
coord_cartesian(expand = FALSE, clip = "off") +
scale_x_continuous(limits = c(-1, 27.5), guide = "none") +
scale_y_continuous(breaks = 0:-6*5, limits = c(-31, 1.7)) +
scale_color_manual(
values = c("#B55951", "#F0B48E", "#669d8f"),
name = NULL
) +
scale_alpha_manual(values = c(1, 0)) +
scale_size_area(max_size = 3.5) +
## styling -------------------------------------------------------------------
theme_minimal(base_size = 16, base_family = "Compagnon-Roman") +
theme(
panel.grid = element_blank(),
axis.ticks.length.x = unit(.4, "lines"),
axis.ticks.x.top = element_line(color = "grey80", size = .6),
axis.text.x.top = element_text(color = "grey30", hjust = c(.5, .7, .3, .5, .5)),
axis.text.y = element_blank(),
plot.caption = element_textbox_simple(family = "Compagnon-Light", size = 13.3,
color = "grey30", lineheight = 1.5,
margin = margin(t = 50, b = 0), hjust = 0),
legend.position = c(.16, .73),
legend.text = element_text(color = "grey30", size = 11.5),
legend.key.width = unit(2.2, "lines"),
plot.margin = margin(40, 40, 30, 40)
) +
labs(x = NULL, y = NULL,
caption = "<b style='color:#212121;font-family:Compagnon-Roman;font-size:19.2pt;'>In 2020, women's gross hourly earnings were on average 13% below those of men in the EU.</b><br>Many countries were able to <b style='color:#577A71;font-family:Compagnon-Roman;'>decrease the pay gap</b> between male and female employees: most notably <b style='color:#577A71;font-family:Compagnon-Roman;'>Luxembourg</b> with the smallest gap in 2020 of 0.7% and <b style='color:#577A71;font-family:Compagnon-Roman;'>Romania</b> with a decrease of 13.6% within 18 years to 2.4%, ranking second. However, in several countries the pay gap was almost <b style='color:#C38B64;font-family:Compagnon-Roman;'>stable (Hungary and France)</b> or even <b style='color:#854D49;font-family:Compagnon-Roman;'>increased (Latvia, Portugal, Croatia, and Malta)</b> compared to the gap in the reference period from 2002 to 2010 for all years available via Eurostat.<br><br>As an unadjusted indicator, the gender pay gap gives an overall picture of the differences between men and women in terms of earnings and measures — a concept which is broader than discrimination in the sense of: <i style='color:#577A71;font-family:Compagnon-Bold;font-size:15.7pt;'>equal pay for work of equal value.</i><br><br>Graphic: Cédric Scherer • Data: Eurostat (SDG_05_20; no data for Greece and Ireland) • #InternationalWomensDay2022")
ggsave("paygap_eu_dumbbell.pdf", width = 16, height = 12, device = cairo_pdf)
pdf_convert("paygap_eu_dumbbell.pdf", filenames = "./plots/paygap_eu_dumbbell.png", dpi = 700)
```
***
<details><summary>Session Info</summary>
```{r sessionInfo}
Sys.time()
sessionInfo()
```
</details>
---
title: "Gender Pay Gap: Slope Plot (EU only)"
author:
- name: "Cédric Scherer"
url: www.cedricscherer.com
output:
distill::distill_article:
highlight: kate
code_folding: false
toc: true
toc_depth: 2
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE,
dev = "ragg_png", dpi = 350, retina = 1)
```
# Setup
```{r packages}
library(tidyverse)
library(here)
library(janitor)
library(ggrepel)
library(ggtext)
library(colorspace)
library(pdftools)
```
# Data
```{r data}
paygap_raw <- read_csv(here::here("sdg_05_20_linear.csv"))
paygap <-
paygap_raw %>%
janitor::clean_names() %>%
dplyr::select(country = geo, year = time_period, gap = obs_value) %>%
filter(!country %in% c("EU28", "EA19", "CH", "NO", "IS")) %>%
mutate(country = if_else(str_detect(country, "EU27_"), "EU27", country))
paygap_comparison <-
paygap %>%
group_by(country) %>%
filter(year == 2020 | year == min(year)) %>%
pivot_wider(
id_cols = c(country),
names_from = year,
values_from = gap,
names_prefix = "gap_"
) %>%
pivot_longer(
cols = -c(country, gap_2020),
names_to = "year_ref",
values_to = "gap_ref",
names_prefix = "gap_"
) %>%
ungroup() %>%
filter(!is.na(gap_ref), !is.na(gap_2020)) %>%
mutate(
year_ref = as.numeric(year_ref),
change = gap_2020 - gap_ref,
trend = case_when(
country == "EU27" ~ "European Union",
change > .5 ~ "Larger pay gap",
change < -.5 ~ "Smaller pay gap",
TRUE ~ "Stable pay gap"
),
trend = factor(trend, levels = c("Larger pay gap", "Stable pay gap", "Smaller pay gap", "European Union")),
country = if_else(country == "EU27", "EU", country),
country_lab = paste(country, scales::percent(gap_2020, scale = 1, accuracy = .1, trim = FALSE))
) %>%
arrange(desc(trend))
```
```{r slope-graph, fig.height=14, fig.width=10}
ggplot(paygap_comparison, aes(x = year_ref, y = -gap_ref, color = trend)) +
## reference period ----------------------------------------------------------
annotate(
geom = "rect",
xmin = 2001.7, xmax = 2010.3,
ymin = -30, ymax = -1.2,
fill = "grey30", alpha = .05
) +
annotate(
geom = "text",
x = 2006, y = -27.5,
label = "Earliest information\non gender pay gaps",
family = "Compagnon-Light",
color = "grey17", size = 3.9,
lineheight = .85
) +
annotate(
geom = "segment",
x = 2001.7, xend = 2010.3,
y = -26.7, yend = -26.7,
color = "grey70", size = .4,
arrow = arrow(type = "closed", ends = "both", unit(1.6, "mm"), angle = 50)
) +
## y grid --------------------------------------------------------------------
annotate(
geom = "segment",
x = 2000.6, xend = 2020.4,
y = -1:-4*5, yend = -1:-4*5,
color = "grey80", linetype = "42", size = .3
) +
annotate(
geom = "segment",
x = 2000.6, xend = 2023.5,
y = -5:-6*5, yend = -5:-6*5,
color = "grey80", linetype = "42", size = .3
) +
annotate(
geom = "segment",
x = 2000.6, xend = 2023.5,
y = 0, yend = 0,
color = "grey80", size = .6
) +
annotate(
geom = "segment",
x = 2020, xend = 2020,
y = 1, yend = 0,
color = "grey80", size = .6
) +
## annotation bubbles --------------------------------------------------------
annotate(
geom = "segment",
x = 2020.2, xend = 2020.2,
y = -22, yend = -23.2,
color = "grey80", size = .3
) +
annotate(
geom = "text",
x = 2020.2, y = -23.9,
label = "Area equals\nabsolute change",
family = "Compagnon-Light",
color = "grey30", size = 3.9,
lineheight = .85
) +
## y-axis labels -------------------------------------------------------------
annotate(
geom = "text",
x = 2000.6, y = -1:-5*5,
label = scales::percent(1:5*5, scale = 1, accuracy = 1, trim = FALSE),
hjust = 0, vjust = 1.7, color = "grey30", family = "Compagnon-Light", size = 4.8
) +
annotate(
geom = "text",
x = 2000.6, y = c(1, 0, -30),
label = c("Women's gross hourly earnings were...",
"equal to those of male paid employees in the EU",
"30% below those of male paid employees in the EU"),
hjust = 0, vjust = 1.7, color = "#212121", family = "Compagnon-Roman", size = 4.8
) +
## annotation countries ------------------------------------------------------
geom_text_repel(
aes(x = 2020.2, y = -gap_2020,
color = trend, label = country_lab,
color = after_scale(darken(color, .2))),
family = "Compagnon-Roman", size = 3.8, hjust = 0,
xlim = c(2021.5, NA), direction = "y", max.overlaps = 20,
segment.size = .3, segment.alpha = .33, #segment.linetype = "13",
segment.curvature = .05, segment.ncp = 3, segment.angle = 10,
inflect = TRUE, box.padding = .2, show.legend = FALSE
) +
## slope -------------------------------------------------------------------
geom_point(
aes(x = 2020.2, y = -gap_2020, size = abs(change)),
color = "white", show.legend = FALSE
) +
geom_point(
aes(x = 2020.2, y = -gap_2020, size = abs(change)),
shape = 1, stroke = .6, show.legend = FALSE
) +
geom_segment(
data = filter(paygap_comparison, trend == "Smaller pay gap"),
aes(xend = 2019.8, yend = -gap_2020), size = .3,
arrow = arrow(type = "closed", length = unit(1.3, "mm"))
) +
geom_segment(
data = filter(paygap_comparison, trend %in% c("Larger pay gap", "Stable pay gap")),
aes(xend = 2019.8, yend = -gap_2020), size = .6,
arrow = arrow(type = "closed", length = unit(2.5, "mm"))
) +
geom_segment(
data = filter(paygap_comparison, trend == "European Union"),
aes(xend = 2019.8, yend = -gap_2020), size = .6,
arrow = arrow(type = "closed", length = unit(1.7, "mm"))
) +
geom_point(show.legend = FALSE) +
## coord + scales ------------------------------------------------------------
coord_cartesian(expand = FALSE, clip = "off") +
scale_x_continuous(
breaks = c(sort(unique(paygap_comparison$year_ref)), 2020),
limits = c(2000.6, 2023.5), position = "top"
) +
scale_y_continuous(breaks = 0:-6*5, limits = c(-31, 1)) +
scale_color_manual(
values = c("#B55951", "#F0B48E", "#91B9AE", "#212121"),
name = NULL
) +
scale_size_area(max_size = 7.5) +
## styling -------------------------------------------------------------------
theme_minimal(base_size = 15, base_family = "Compagnon-Roman") +
theme(
panel.grid = element_blank(),
axis.ticks.length.x = unit(.4, "lines"),
axis.ticks.x.top = element_line(color = "grey80", size = .6),
axis.text.x.top = element_text(color = "grey30"),
axis.text.y = element_blank(),
plot.caption = element_textbox_simple(family = "Compagnon-Light", size = 11.5,
color = "grey30", lineheight = 1.4,
margin = margin(t = 35, b = 5), hjust = 0),
legend.position = c(.87, .12),
legend.text = element_text(color = "grey30", family = "Compagnon-Light", size = 11.5),
legend.key.width = unit(1.8, "lines"),
plot.margin = margin(40, 40, 30, 40)
) +
labs(x = NULL, y = NULL,
caption = "<b style='color:#212121;font-family:Compagnon-Roman;'>In 2020, women's gross hourly earnings were on average 13% below those of men in the EU.</b><br>Many countries were able to <b style='color:#6E9288;font-family:Compagnon-Roman;'>decrease the pay gap</b> between male and female employees: most notably <b style='color:#6E9288;font-family:Compagnon-Roman;'>Luxembourg</b> with the smallest gap in 2020 of 0.7% and <b style='color:#6E9288;font-family:Compagnon-Roman;'>Romania</b> with a decrease of 13.6% within 18 years to 2.4%, ranking second. However, in several countries the pay gap was almost <b style='color:#C38B64;font-family:Compagnon-Roman;'>stable (France, Lithuania, and Italy)</b> or even <b style='color:#854D49;font-family:Compagnon-Roman;'>increased (Latvia, Portugal, Croatia, and Malta)</b> compared to the earliest reference value listed by Eurostat.<br><br>As an unadjusted indicator, the gender pay gap gives an overall picture of the differences between men and women in terms of earnings and measures — a concept which is broader than discrimination in the sense of: <i style='color:#6E9288;font-family:Compagnon-Bold;'>equal pay for work of equal value.</i><br><br><b style='font-size:8.4pt;'>Graphic: Cédric Scherer • Data: Eurostat (SDG_05_20; no data for Greece and Ireland) • #InternationalWomensDay2022</b>")
ggsave("paygap_eu_slope.pdf", width = 10, height = 14, device = cairo_pdf)
pdf_convert("paygap_eu_slope.pdf", filenames = "paygap_eu_slope.png", dpi = 700)
```
***
<details><summary>Session Info</summary>
```{r sessionInfo}
Sys.time()
sessionInfo()
```
</details>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment