Skip to content

Instantly share code, notes, and snippets.

@mhermans
Created January 14, 2022 13:13
Show Gist options
  • Save mhermans/eaa2019d559ffaba28e68314a874cab2 to your computer and use it in GitHub Desktop.
Save mhermans/eaa2019d559ffaba28e68314a874cab2 to your computer and use it in GitHub Desktop.
Visualize a choropleth map of MSD prevalence in EU, using ggplot and the Eurostat API for data and administrative boundaries.
diagnose sex age unit geo time values iso3_code
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Slovakia 2020-01-01 73.2 SVK
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Czechia 2020-01-01 72.2 CZE
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Cyprus 2020-01-01 71.9 CYP
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Poland 2020-01-01 68.8 POL
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Croatia 2020-01-01 67.3 HRV
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Austria 2020-01-01 66.6 AUT
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Latvia 2020-01-01 66.4 LVA
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Greece 2020-01-01 66 GRC
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Norway 2020-01-01 64.5 NOR
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Italy 2020-01-01 64.4 ITA
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Lithuania 2020-01-01 62.1 LTU
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Finland 2020-01-01 61.9 FIN
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Slovenia 2020-01-01 60.9 SVN
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Hungary 2020-01-01 60.4 HUN
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Spain 2020-01-01 60.3 ESP
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Portugal 2020-01-01 59.5 PRT
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Estonia 2020-01-01 58.5 EST
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Romania 2020-01-01 57.8 ROU
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Switzerland 2020-01-01 56.5 CHE
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Germany 2020-01-01 56 DEU
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Belgium 2020-01-01 53.9 BEL
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem France 2020-01-01 53.5 FRA
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Bulgaria 2020-01-01 47.7 BGR
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Denmark 2020-01-01 47.4 DNK
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Ireland 2020-01-01 44.9 IRL
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Netherlands 2020-01-01 42.7 NLD
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Luxembourg 2020-01-01 39.9 LUX
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Sweden 2020-01-01 38.5 SWE
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Iceland 2020-01-01 34 ISL
Musculo-skeletal disorders Total From 15 to 64 years Percentage of persons reporting a health problem Malta 2020-01-01 33.5 MLT
# ######################################################################### #
# Visualize a choropleth map of musculo-skeletal disorder prevalence in EU, #
# using geom_sf() and Eurostat API for data and administrative boundaries. #
# ######################################################################### #
library(eurostat)
library(lubridate)
library(dplyr)
library(tidyr)
library(countrycode)
library(giscoR)
library(sf)
library(janitor)
library(ggplot2)
library(hrbrthemes)
library(readr)
library(assertr)
library(here)
# Query Eurostat for work-related health datasets
search_eurostat('work-related health')
# ...
# 6 Persons reporting a work-related health problem resulting in limitations of daily activities hsw_pb4 datas… 12.10.2021 ...
# 7 Persons reporting a work-related health problem by sex, age and type of problem hsw_pb5 datas… 12.10.2021 ...
# ...
# query Eurostat for hsw_pb5: Persons reporting a work-related health problem by sex, age and type of problem
hsw_pb5 <- get_eurostat(
id = 'hsw_pb5',
type = 'label', # get informative labels instead of (shorter) codes
time_format = "date") %>%
clean_names() # clean/standardise names
# subset data: only MSD and population totals
hsw_pb5_subset <- hsw_pb5 %>%
filter(
sex == 'Total', age == 'From 15 to 64 years',
diagnose == 'Musculo-skeletal disorders',
time == '2020-01-01',
unit == 'Percentage of persons reporting a health problem',
!(geo %in% c('European Union - 27 countries (from 2020)',
'Euro area - 19 countries (from 2015)',
'European Union - 28 countries (2013-2020)') )) %>%
mutate(geo = if_else(geo == 'Germany (until 1990 former territory of the FRG)', 'Germany', geo)) %>%
# when using labels, you dont have the ISO-countrycode -> add for merging with spatial boundaries
mutate(iso3_code = countrycode(
geo, origin = 'country.name', destination = 'iso3c')) %>%
arrange(desc(values))
# export dataset for to reproduce data from API later if needed
hsw_pb5_subset %>%
verify(dim(.) == c(30, 8) )%>%
write_csv(here::here('20220113_eurostat_hsw_pb5_subset.csv'))
# get list of ISO 3-digit country codes for mainland EU-countries using countrycode-package
eu_mainland_countries_iso3c <- countrycode::codelist %>%
select(country.name.en, iso3c, region, continent) %>%
filter(continent == 'Europe') %>%
filter(iso3c != 'ALA') %>% # Åland Islands not recognised by gisco_get_countries()
filter(iso3c != 'RUS') %>% # # drop Russia
pull(iso3c)
# Use list of ISO 3-digit country codes for mainland EU to query the Eurostat GISCO-service API
# for a sf dataframe with the administrative boundaries for these countries
eu_mainland <- gisco_get_countries(
year = '2020', epsg = '4326', cache_dir = tempdir(),
resolution = '20',
country = eu_mainland_countries_iso3c,
spatialtype = 'RG') %>% # request region (multi)polygons
clean_names() # clean/standardise names
# crop resulting multipolygons in sf dataframe to "mainland" (drop islands etc.)
eu_mainland <- st_crop(
eu_full_noru,
c(xmin=-10, xmax=45, ymin=36, ymax=71))
# merge sf dataframe with Eurstat dataframe on ISO-countrycode
eu_mainland <- eu_mainland %>%
left_join(hsw_pb5_subset, by = 'iso3_code')
# plot map using ggplot2::geom_sf()
p_eu_msa_prevalence <- ggplot(eu_mainland) +
geom_sf(
aes(fill = values),
color = 'white', lwd = 0.4) + # thin, white country lines
coord_sf(
# limit map to 'mainland' EU
xlim = c(2500000, 6500000), ylim =c(1500000, 5300000),
label_axes = "",
crs = 3035) +
scale_fill_distiller(
type = 'seq', palette = 'Reds', direction = 1,
na.value="grey80",
labels = scales::percent_format(scale = 1, accuracy = 1)) +
theme_ipsum_rc() +
theme(legend.position = c(.89, .85)) +
labs(
x = NULL, y = NULL, fill = NULL,
title = "Musculo-skeletal disorders are by far the leading\nhealth problem for working Europeans",
subtitle = "The percentage of working persons with a health problem,\nthat report a musculo-skeletal disorder (2020).",
caption = 'Source: Eurostat/LFS (hsw_pb5, 2020), 15-64 year olds that are currently working or were working in\nthe past • Maarten Hermans (@hermansm) • © EuroGeographics for the administrative borders.')
ggsave(
here::here('map_eu_msd_prevalence_en.png'),
p_eu_msa_prevalence, width = 7, height = 7)
@mhermans
Copy link
Author

map_eu_msd_prevalence_en

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment