Skip to content

Instantly share code, notes, and snippets.

@mschnetzer
Last active December 7, 2022 19:11
Show Gist options
  • Save mschnetzer/407f96d4dc5773917349f2ea1c0de217 to your computer and use it in GitHub Desktop.
Save mschnetzer/407f96d4dc5773917349f2ea1c0de217 to your computer and use it in GitHub Desktop.
Branchen, Bildung, Löhne in Österreich (https://twitter.com/matschnetzer/status/1600437565332414465)
# This code is adapted from original code by @cnicault https://github.com/cnicault/tidytuesday/tree/master/2021/2021-21-ask_a_manager
library(tidyverse)
library(readxl)
library(scales)
library(ggbump)
library(glue)
library(showtext)
library(msthemes)
font_add_google("Raleway", "raleway")
font_add_google("Roboto Condensed", "roboto")
font_add_google("Pattaya", "pattaya")
showtext_opts(dpi = 320)
showtext_auto(enable = TRUE)
# Get data: http://www.statistik.at/web_de/statistiken/menschen_und_gesellschaft/soziales/personen-einkommen/verdienststruktur/index.html
wagedat <- read_xlsx("bruttojahresverdienste_der_vollzeitbeschaeftigten_nach_wirtschaftstaetigke.xlsx")
edudat <- read_xlsx("bruttojahresverdienste_2018_nach_merkmalen_der_beschaeftigten.xlsx",sheet = "Tab. 3")
# Wages: Data editing
wages <- wagedat |> magrittr::set_colnames(c("NACE", "Branche", "Durchschnitt", "P25", "Median", "P75", "Anzahl")) |>
mutate(Geschlecht = case_when(Durchschnitt == "Insgesamt" ~ "Insgesamt",
Durchschnitt == "Frauen" ~ "Frauen",
Durchschnitt == "Männer" ~ "Männer",
T ~ NA_character_)) |>
fill(Geschlecht) |>
filter(str_length(NACE) == 1) %>% select(-Anzahl) |>
mutate(across(c(Durchschnitt,P25,Median,P75), as.numeric)) |>
pivot_longer(Durchschnitt:P75, names_to = "Position", values_to = "Lohn")
wages <- wages |>
mutate(wageclass = cut(Lohn, breaks=seq(20000,95000,5000),
labels = paste0("€ ",comma(seq(20000,90000,5000),big.mark=" ")," - ",
comma(seq(25000,95000,5000), big.mark=" ")),
ordered_result = T)) |>
mutate(index_ind = as.numeric(fct_rev(NACE)),
index = as.numeric(wageclass),
group = glue("{NACE}-{index}"))
# Education: Data editing
edu <- edudat |> magrittr::set_colnames(c("Bildung", "Durchschnitt", "P25", "Median", "P75", "Anzahl")) |>
mutate(Zeit = case_when(Durchschnitt == "Insgesamt" ~ "Insgesamt",
Durchschnitt == "Vollzeit" ~ "Vollzeit",
Durchschnitt == "Teilzeit" ~ "Teilzeit",
T ~ NA_character_)) |>
fill(Zeit) |> drop_na() |> select(-Anzahl) |>
filter(Zeit == "Vollzeit", !Bildung == "Insgesamt") |>
mutate(across(c(Durchschnitt,P25,Median,P75), as.numeric)) |>
pivot_longer(Durchschnitt:P75, names_to = "Position", values_to = "Lohn")
edu <- edu |>
mutate(wageclass = cut(Lohn, breaks=seq(20000,95000,5000),
labels = paste0("€ ",comma(seq(20000,90000,5000),big.mark=" ")," - ",
comma(seq(25000,95000,5000), big.mark=" ")),
ordered_result = T)) |>
mutate(Bildung = recode_factor(Bildung,
"Höchstens Pflichtschulabschluss" = "Pflichtschulabschluss",
"Lehrabschluss"="Lehrabschluss",
"Berufsbildende mittlere Schule, Meisterprüfung" = "BMS, Meisterprüfung",
"Allgemeinbildende höhere Schule" = "AHS",
"Berufsbildende höhere Schule" = "BHS",
"Kolleg, Akademie, hochschulverwandte Lehranstalt" = "Kolleg, Akademie",
"Universität, Fachhochschule" = "Universität, FH")) |>
mutate(index_edu = as.numeric(Bildung),
index = as.numeric(wageclass),
group_edu = glue("{Bildung}-{index}"))
wagegap <- wages |> select(NACE, Geschlecht, Position, Lohn) |>
filter(Position == "Durchschnitt", !Geschlecht == "Insgesamt") |>
pivot_wider(names_from = Geschlecht, values_from = Lohn) |>
mutate(gpg = 1-(Männer-Frauen)/Männer,
index_ind = as.numeric(fct_rev(NACE)))
explanation <- "Die grauen Linien zeigen das 25., 50. und 75. Perzentil der Bruttolöhne von Vollzeitbeschäftigten. Die gefärbten Linien zeigen das durchschnittliche Jahresgehalt."
wages |> filter(Geschlecht == "Insgesamt") |>
ggplot() +
geom_segment(aes(x=-0.5, xend=-4.4, y=index_ind, yend=index_ind), color = "grey90", linetype="dashed", linewidth = 0.2, alpha = 0.2) +
geom_label(aes(x = -0.5, y = index_ind, label = Branche), hjust = 1, color = "grey90", label.size = 0, fill = "grey30", size = 3, family= "raleway") +
geom_text(aes(x = 2.35, y = index+0.5, label = wageclass), color = "grey90", size = 3, family = "raleway", hjust = 0.5) +
geom_sigmoid(aes(x = -0.4, xend = 1.7, y = index_ind, yend=index+0.5, group = factor(group)), color="grey50", linewidth = 0.2) +
geom_sigmoid(data = filter(wages, Position=="Durchschnitt" & Geschlecht == "Insgesamt"), aes(x = -0.4, xend = 1.7, y = index_ind, yend=index+0.5, group = factor(group), color=Lohn), linewidth = 0.5) +
geom_point(data = filter(wages, Position=="Durchschnitt" & Geschlecht == "Insgesamt"), aes(x = 1.7, y = index+0.5, color = Lohn), size = 1.5, inherit.aes = FALSE) +
geom_label(data=edu, aes(x = 4.5, y = index_edu+4, label = Bildung), hjust = 0, color = "grey90", label.size = 0, fill = "grey30", size = 3, family= "raleway") +
geom_sigmoid(data=edu, aes(x = 3, xend = 4.5, y = index+0.5, yend=index_edu+4, group = factor(group_edu)), color="grey50", linewidth = 0.2) +
geom_sigmoid(data = filter(edu, Position=="Durchschnitt"), aes(x = 3, xend = 4.5, y = index+0.5, yend=index_edu+4, group = factor(group_edu), color=Lohn), linewidth = 0.5) +
geom_point(data = filter(edu, Position=="Durchschnitt"), aes(x = 3, y = index+0.5, color = Lohn), size = 1.5, inherit.aes = FALSE) +
geom_segment(data=wagegap, aes(x=-4.5, xend=-5.5, y=index_ind, yend=index_ind), linewidth = 3, color = "gray80") +
geom_segment(data=wagegap, aes(x=-4.5, xend=-4.5-gpg, y=index_ind, yend=index_ind), linewidth = 3, color = "darkgoldenrod") +
geom_text(data=wagegap, aes(x=-5.6,y=index_ind, label=percent(1-gpg, accuracy = 1)), size = 2.5, color="gray90", hjust = 1) +
annotate("text", x = -5.5, y = 15.5, label = "Gender Pay Gap", color = "grey90", size = 2.5, family="raleway", hjust=0) +
annotate("text", x = 2.35, y = 16, label = str_wrap(explanation, 90), color = "grey90", hjust = 0.5, size = 2.5, family="roboto") +
scale_x_continuous(limits = c(-6,6)) +
scale_y_continuous(limits = c(0.5,16)) +
scale_color_gradient(low="darkred",high="yellow") +
theme_void() +
labs(title = "Branchen, Bildungsabschlüsse und Löhne in Österreich",
subtitle = "Auswertungen der Verdienststrukturerhebung 2018 für Vollzeitbeschäftigte",
caption = "Daten: Statistik Austria. Idee: @cnicault. Grafik: @matschnetzer") +
theme(plot.background = element_rect(fill = "grey30"),
legend.position = "none",
plot.title = element_text(size=18, color="gray90", family="pattaya", margin = margin(t=20), hjust = 0.1),
plot.subtitle = element_text(size=11,color="gray90",family="roboto", margin = margin(t=5), hjust=0.1),
plot.caption = element_text(size=8,color="gray90",family="roboto", margin = margin(b=5), hjust=0.98))
ggsave("wages.png", dpi=320, width = 12, height = 6)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment