Skip to content

Instantly share code, notes, and snippets.

@mschnetzer
Created July 10, 2023 07:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mschnetzer/4f7f15d6f76542c75864871ee78394c1 to your computer and use it in GitHub Desktop.
Save mschnetzer/4f7f15d6f76542c75864871ee78394c1 to your computer and use it in GitHub Desktop.
Volksschüler:innen in Ganztagsschulen (https://twitter.com/matschnetzer/status/1678307172596195328?s=20)
librarian::shelf(tidyverse, pdftools, geojsonsf, sf, ggtext, patchwork)
# PDF herunterladen und Seite 3 extrahieren
rawpdf <- pdf_text("https://www.parlament.gv.at/dokument/XXVII/AB/14482/imfname_1572157.pdf")
rawpdf <- rawpdf[3]
# Rohdaten in Zeilen umwandeln und Datenbereich auswählen
lines <- strsplit(rawpdf, '\n') |> unlist() |> str_trim(side = "left")
short <- lines[c(11:24,30:36,42:63)]
# Dataframe aus Zeilen erstellen und Daten bearbeiten
df <- as.data.frame(short) |>
separate(1, LETTERS, sep="\\s+(?=[0-9])|\\s+na") |>
map_df(str_remove, pattern = "\\.") |>
mutate(across(-A, as.numeric)) |>
select(region = A, where(function(x) any(!is.na(x)))) |>
drop_na() |>
filter(!str_detect(region, "Gesamt")) |>
mutate(region = str_remove(region, pattern = ".*?\\s+\\s+")) |>
pivot_longer(-region, names_to = "original", values_to = "number") |>
mutate(type = rep(c("VS","MS","SO","AHS-U"), 93),
day = rep(rep(c("Gesamt","Halbtag","Ganztag"), each = 4), 31)) |>
pivot_wider(id_cols = c(region,type), names_from = day, values_from = number) |>
mutate(gtshare = Ganztag/Gesamt*100,
htshare = Halbtag/Gesamt*100)
# Bezirkskarte für Österreich laden
bezmap <- geojson_sf("https://raw.githubusercontent.com/ginseng666/GeoJSON-TopoJSON-Austria/master/2021/simplified-99.9/bezirke_999_geo.json")
# Bildungsregionen den einzelnen politischen Bezirken zuordnen
bezmap <- bezmap |>
mutate(region = case_when(
iso %in% c(802,803) ~ "Vorarlberg‐Region Nord",
iso %in% c(804,801) ~ "Vorarlberg‐Region Süd",
iso %in% c(702,706,708) ~ "Tirol‐Region West",
iso %in% c(701,703,709) ~ "Tirol‐Region Mitte",
iso %in% c(704,705,707) ~ "Tirol‐Region Ost",
iso %in% c(501,503) ~ "Salzburg‐Region Nord",
iso %in% c(502,504:506) ~ "Salzburg‐Region Süd",
iso %in% c(902,903,910,911,920,921,922) ~ "Wien‐Region Ost",
iso %in% c(901,904:909,912:919,923) ~ "Wien‐Region West",
iso %in% c(612) ~ "STMK‐Region Liezen",
iso %in% c(611,621) ~ "STMK‐Region Obersteiermark Ost",
iso %in% c(614,620) ~ "STMK‐Region Obersteiermark West",
iso %in% c(617,622) ~ "STMK‐Region Oststeiermark",
iso %in% c(601,606,616) ~ "STMK‐Region Steirischer Zentralraum",
iso %in% c(623) ~ "STMK‐Region Südoststeiermark",
iso %in% c(603,610) ~ "STMK‐Region Südweststeiermark",
iso %in% c(407,417) ~ "OÖ‐Region Gmunden‐Vöcklabruck",
iso %in% c(404,412,414) ~ "OÖ‐Region Innviertel",
iso %in% c(401,410) ~ "OÖ‐Region Linz/L",
iso %in% c(406,411,413,416) ~ "OÖ‐Region Mühlviertel",
iso %in% c(402,409,415) ~ "OÖ‐Region Steyr‐Kirchdorf",
iso %in% c(403,405,408,418) ~ "OÖ‐Region Wels‐Grieskirchen‐Eferding",
iso %in% c(306,307,317) ~ "NÖ‐Region Baden",
iso %in% c(308,310,312,316) ~ "NÖ‐Region Mistelbach",
iso %in% c(302,314,319,321) ~ "NÖ‐Region Tulln",
iso %in% c(303,305,315,320) ~ "NÖ‐Region Waidhofen an der Ybbs",
iso %in% c(304,318,323) ~ "NÖ‐Region Wiener Neustadt",
iso %in% c(301,309,311,313,322,325) ~ "NÖ‐Region Zwettl",
iso %in% c(201,204,205,208,209) ~ "Kärnten‐Region Ost",
iso %in% c(202,203,206,207,210) ~ "Kärnten‐Region West",
iso %in% c(101:109) ~ "Burgenland‐Region",
)) |>
mutate(region = str_replace_all(region, "-", "-")) |>
drop_na()
# Daten nur für Volksschulen auswerten und Anteile in 10%-Klassen einteilen
vsmap <- bezmap |> left_join(df |> filter(type == "VS") |> select(region, gtshare)) |>
summarise(geometry = st_union(geometry), gtshare = mean(gtshare), .by = region) |>
mutate(anteil = cut(gtshare, breaks = seq(0,100,10),
labels = glue::glue(">{seq(0,90,10)} bis {seq(10,100,10)}%")))
# Karte mit Bundesland-Grenzen erstellen
blmap <- bezmap |> mutate(bl = str_extract(iso, "^\\d")) |> summarise(geometry = st_union(geometry), .by = bl)
# Österreich-Karte mit gruppierten Anteilen
p1 <- vsmap |>
ggplot() +
geom_sf(aes(fill = anteil, group = region), linewidth = 0.1, color = "gray90") +
geom_sf(data = blmap, fill = "transparent", linewidth = 0.2, color = "black") +
annotate("richtext", x = 9.52, y = 49.1, fill = NA, label.colour = NA,
label = "<span style='font-size:26px;font-family:\"Playfair Display\";'>Ganztägig betreut</span><br><br>Anteil der Volksschüler:innen<br>in <span style='color:#bf4f51;'>Ganztagesschulen</span> nach<br>Bildungsregionen, 2022/23<br><br><span style='font-size:10px;'>Quelle: Parlament · Grafik: @matschnetzer</span>",
size = 4, hjust = 0, vjust = 1, family = "Roboto Condensed", lineheight = 1.1) +
scale_fill_manual(values = MetBrewer::met.brewer("Tam", direction = -1)[3:9],
guide = guide_legend(nrow = 1, label.position = "bottom",
keywidth = 4, keyheight = 0.6)) +
coord_sf(ylim = c(46.3, 49)) +
theme_minimal(base_family = "Roboto Condensed", base_size = 10) +
theme(legend.position = c(0.5, 0),
legend.title = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank())
# Top 5 Bildungsregionen mit höchsten GT-Anteil
p2 <- vsmap |> slice_max(gtshare, n = 5) |>
mutate(region = fct_reorder(region, gtshare)) |>
ggplot(aes(x = region, y = gtshare, color = anteil)) +
geom_point(size = 2) +
geom_segment(aes(xend = region, yend = 0), linewidth = 1.5) +
geom_text(y = 0, aes(label = glue::glue("{str_remove(region, pattern = '‐Region')}: {round(gtshare,0)}%")),
hjust = 0, size = 2.3, nudge_x = 0.35, color = "black", family = "Roboto Condensed") +
scale_y_continuous(labels = scales::percent_format(scale = 1, suffix = "%"),
limits = c(0, 60)) +
scale_color_manual(values = MetBrewer::met.brewer("Tam", direction = -1)[6:9]) +
labs(title = "5 höchste und niedrigste Quoten") +
coord_flip() +
theme_minimal(base_family = "Roboto Condensed", base_size = 9) +
theme(axis.title = element_blank(),
axis.text = element_blank(),
legend.position = "none",
plot.title = element_text(size = 9),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(linewidth = 0.2),
panel.grid.minor = element_blank())
# 5 Bildungsregionen mit niedrigstem GT-Anteil
p3 <- vsmap |> slice_min(gtshare, n = 5) |>
mutate(region = fct_reorder(region, gtshare)) |>
ggplot(aes(x = region, y = gtshare, color = anteil)) +
geom_point(size = 2) +
geom_segment(aes(xend = region, yend = 0), linewidth = 1.5) +
geom_text(y = 0, aes(label = glue::glue("{str_remove(region, pattern = '‐Region')}: {round(gtshare,0)}%")),
hjust = 0, size = 2.3, nudge_x = 0.35, color = "black", family = "Roboto Condensed") +
scale_y_continuous(labels = scales::percent_format(scale = 1, suffix = "%"),
limits = c(0, 60)) +
scale_color_manual(values = MetBrewer::met.brewer("Tam", direction = -1)[3:4]) +
coord_flip() +
theme_minimal(base_family = "Roboto Condensed", base_size = 9) +
theme(axis.title = element_blank(),
axis.text.y = element_blank(),
legend.position = "none",
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(linewidth = 0.2),
panel.grid.minor = element_blank())
# Plots mit patchwork zusammenführen
p1 + (p2 / p3) + plot_layout(widths = c(2/3, 1/3))
ggsave("gtschule.png", bg="white", width=10, height=4, dpi=320)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment