Last active
December 7, 2022 19:11
-
-
Save mschnetzer/407f96d4dc5773917349f2ea1c0de217 to your computer and use it in GitHub Desktop.
Branchen, Bildung, Löhne in Österreich (https://twitter.com/matschnetzer/status/1600437565332414465)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This code is adapted from original code by @cnicault https://github.com/cnicault/tidytuesday/tree/master/2021/2021-21-ask_a_manager | |
library(tidyverse) | |
library(readxl) | |
library(scales) | |
library(ggbump) | |
library(glue) | |
library(showtext) | |
library(msthemes) | |
font_add_google("Raleway", "raleway") | |
font_add_google("Roboto Condensed", "roboto") | |
font_add_google("Pattaya", "pattaya") | |
showtext_opts(dpi = 320) | |
showtext_auto(enable = TRUE) | |
# Get data: http://www.statistik.at/web_de/statistiken/menschen_und_gesellschaft/soziales/personen-einkommen/verdienststruktur/index.html | |
wagedat <- read_xlsx("bruttojahresverdienste_der_vollzeitbeschaeftigten_nach_wirtschaftstaetigke.xlsx") | |
edudat <- read_xlsx("bruttojahresverdienste_2018_nach_merkmalen_der_beschaeftigten.xlsx",sheet = "Tab. 3") | |
# Wages: Data editing | |
wages <- wagedat |> magrittr::set_colnames(c("NACE", "Branche", "Durchschnitt", "P25", "Median", "P75", "Anzahl")) |> | |
mutate(Geschlecht = case_when(Durchschnitt == "Insgesamt" ~ "Insgesamt", | |
Durchschnitt == "Frauen" ~ "Frauen", | |
Durchschnitt == "Männer" ~ "Männer", | |
T ~ NA_character_)) |> | |
fill(Geschlecht) |> | |
filter(str_length(NACE) == 1) %>% select(-Anzahl) |> | |
mutate(across(c(Durchschnitt,P25,Median,P75), as.numeric)) |> | |
pivot_longer(Durchschnitt:P75, names_to = "Position", values_to = "Lohn") | |
wages <- wages |> | |
mutate(wageclass = cut(Lohn, breaks=seq(20000,95000,5000), | |
labels = paste0("€ ",comma(seq(20000,90000,5000),big.mark=" ")," - ", | |
comma(seq(25000,95000,5000), big.mark=" ")), | |
ordered_result = T)) |> | |
mutate(index_ind = as.numeric(fct_rev(NACE)), | |
index = as.numeric(wageclass), | |
group = glue("{NACE}-{index}")) | |
# Education: Data editing | |
edu <- edudat |> magrittr::set_colnames(c("Bildung", "Durchschnitt", "P25", "Median", "P75", "Anzahl")) |> | |
mutate(Zeit = case_when(Durchschnitt == "Insgesamt" ~ "Insgesamt", | |
Durchschnitt == "Vollzeit" ~ "Vollzeit", | |
Durchschnitt == "Teilzeit" ~ "Teilzeit", | |
T ~ NA_character_)) |> | |
fill(Zeit) |> drop_na() |> select(-Anzahl) |> | |
filter(Zeit == "Vollzeit", !Bildung == "Insgesamt") |> | |
mutate(across(c(Durchschnitt,P25,Median,P75), as.numeric)) |> | |
pivot_longer(Durchschnitt:P75, names_to = "Position", values_to = "Lohn") | |
edu <- edu |> | |
mutate(wageclass = cut(Lohn, breaks=seq(20000,95000,5000), | |
labels = paste0("€ ",comma(seq(20000,90000,5000),big.mark=" ")," - ", | |
comma(seq(25000,95000,5000), big.mark=" ")), | |
ordered_result = T)) |> | |
mutate(Bildung = recode_factor(Bildung, | |
"Höchstens Pflichtschulabschluss" = "Pflichtschulabschluss", | |
"Lehrabschluss"="Lehrabschluss", | |
"Berufsbildende mittlere Schule, Meisterprüfung" = "BMS, Meisterprüfung", | |
"Allgemeinbildende höhere Schule" = "AHS", | |
"Berufsbildende höhere Schule" = "BHS", | |
"Kolleg, Akademie, hochschulverwandte Lehranstalt" = "Kolleg, Akademie", | |
"Universität, Fachhochschule" = "Universität, FH")) |> | |
mutate(index_edu = as.numeric(Bildung), | |
index = as.numeric(wageclass), | |
group_edu = glue("{Bildung}-{index}")) | |
wagegap <- wages |> select(NACE, Geschlecht, Position, Lohn) |> | |
filter(Position == "Durchschnitt", !Geschlecht == "Insgesamt") |> | |
pivot_wider(names_from = Geschlecht, values_from = Lohn) |> | |
mutate(gpg = 1-(Männer-Frauen)/Männer, | |
index_ind = as.numeric(fct_rev(NACE))) | |
explanation <- "Die grauen Linien zeigen das 25., 50. und 75. Perzentil der Bruttolöhne von Vollzeitbeschäftigten. Die gefärbten Linien zeigen das durchschnittliche Jahresgehalt." | |
wages |> filter(Geschlecht == "Insgesamt") |> | |
ggplot() + | |
geom_segment(aes(x=-0.5, xend=-4.4, y=index_ind, yend=index_ind), color = "grey90", linetype="dashed", linewidth = 0.2, alpha = 0.2) + | |
geom_label(aes(x = -0.5, y = index_ind, label = Branche), hjust = 1, color = "grey90", label.size = 0, fill = "grey30", size = 3, family= "raleway") + | |
geom_text(aes(x = 2.35, y = index+0.5, label = wageclass), color = "grey90", size = 3, family = "raleway", hjust = 0.5) + | |
geom_sigmoid(aes(x = -0.4, xend = 1.7, y = index_ind, yend=index+0.5, group = factor(group)), color="grey50", linewidth = 0.2) + | |
geom_sigmoid(data = filter(wages, Position=="Durchschnitt" & Geschlecht == "Insgesamt"), aes(x = -0.4, xend = 1.7, y = index_ind, yend=index+0.5, group = factor(group), color=Lohn), linewidth = 0.5) + | |
geom_point(data = filter(wages, Position=="Durchschnitt" & Geschlecht == "Insgesamt"), aes(x = 1.7, y = index+0.5, color = Lohn), size = 1.5, inherit.aes = FALSE) + | |
geom_label(data=edu, aes(x = 4.5, y = index_edu+4, label = Bildung), hjust = 0, color = "grey90", label.size = 0, fill = "grey30", size = 3, family= "raleway") + | |
geom_sigmoid(data=edu, aes(x = 3, xend = 4.5, y = index+0.5, yend=index_edu+4, group = factor(group_edu)), color="grey50", linewidth = 0.2) + | |
geom_sigmoid(data = filter(edu, Position=="Durchschnitt"), aes(x = 3, xend = 4.5, y = index+0.5, yend=index_edu+4, group = factor(group_edu), color=Lohn), linewidth = 0.5) + | |
geom_point(data = filter(edu, Position=="Durchschnitt"), aes(x = 3, y = index+0.5, color = Lohn), size = 1.5, inherit.aes = FALSE) + | |
geom_segment(data=wagegap, aes(x=-4.5, xend=-5.5, y=index_ind, yend=index_ind), linewidth = 3, color = "gray80") + | |
geom_segment(data=wagegap, aes(x=-4.5, xend=-4.5-gpg, y=index_ind, yend=index_ind), linewidth = 3, color = "darkgoldenrod") + | |
geom_text(data=wagegap, aes(x=-5.6,y=index_ind, label=percent(1-gpg, accuracy = 1)), size = 2.5, color="gray90", hjust = 1) + | |
annotate("text", x = -5.5, y = 15.5, label = "Gender Pay Gap", color = "grey90", size = 2.5, family="raleway", hjust=0) + | |
annotate("text", x = 2.35, y = 16, label = str_wrap(explanation, 90), color = "grey90", hjust = 0.5, size = 2.5, family="roboto") + | |
scale_x_continuous(limits = c(-6,6)) + | |
scale_y_continuous(limits = c(0.5,16)) + | |
scale_color_gradient(low="darkred",high="yellow") + | |
theme_void() + | |
labs(title = "Branchen, Bildungsabschlüsse und Löhne in Österreich", | |
subtitle = "Auswertungen der Verdienststrukturerhebung 2018 für Vollzeitbeschäftigte", | |
caption = "Daten: Statistik Austria. Idee: @cnicault. Grafik: @matschnetzer") + | |
theme(plot.background = element_rect(fill = "grey30"), | |
legend.position = "none", | |
plot.title = element_text(size=18, color="gray90", family="pattaya", margin = margin(t=20), hjust = 0.1), | |
plot.subtitle = element_text(size=11,color="gray90",family="roboto", margin = margin(t=5), hjust=0.1), | |
plot.caption = element_text(size=8,color="gray90",family="roboto", margin = margin(b=5), hjust=0.98)) | |
ggsave("wages.png", dpi=320, width = 12, height = 6) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment