Skip to content

Instantly share code, notes, and snippets.

@cimentadaj
Last active March 13, 2020 13:20
Show Gist options
  • Save cimentadaj/f73e1c51492165d5576e3fd1034acb8b to your computer and use it in GitHub Desktop.
Save cimentadaj/f73e1c51492165d5576e3fd1034acb8b to your computer and use it in GitHub Desktop.
library(eurostat)
library(dplyr)
res <- get_eurostat("demo_r_pjangroup")
## Example for NUTS1: UKB
uk_match <- "UK[A-Z]{1}$"
## Example for NUTS2: ITAC12
ita_match <- "IT[A-Z][0-9]{1,2}"
## Example for NUTS1: either DE3 or DEA
## I don't know why NUTS1 Germany mixes numbers and letters
de_match <- "DE[A-Z]{1}$|DE[0-9]{1}$"
## Example for NUTS1: FR1 and FRB, FRC, etc..
fr_match <- "FR1$|FR[A-Z]{1}$"
## Example for NUTS2: ES11
es_match <- "ES[0-9]{2,2}$"
## Example for NUTS2: IE04 to IE06
ie_match <- "IE04|IE05|IE06$"
## Example for NUTS2: NL12
nl_match <- "NL[0-9]{2,2}$"
## Example for NUTS2: BE12
be_match <- "BE[0-9]{2,2}$"
## Example for NUTS2: AT11
at_match <- "AT[0-9]{2,2}$"
## Example for NUTS2: AT11
lu_match <- "LU[0-9]{2,2}$"
## Wikipedia NUTS library for each country
## https://en.wikipedia.org/wiki/NUTS_statistical_regions_of_France
## Just change the country name at the end for the desired one
final_match <- paste0(c(uk_match,
ita_match,
de_match,
fr_match,
es_match,
ie_match,
nl_match,
be_match,
at_match,
lu_match),
collapse = "|")
tst <-
res %>%
## To quickly check the codes for each country -- delete after done
## filter(grepl("FR", geo)) %>%
## distinct(geo) %>%
## print(n = Inf)
## Select the most recent time point
group_by(sex, age, geo) %>%
filter(time == max(time)) %>%
ungroup() %>%
# Drop unused columns
select(-unit, -time) %>%
filter(sex != "T", !age %in% c("TOTAL", "UNK"), grepl(final_match, geo)) %>%
label_eurostat(code = "geo", fix_duplicated = TRUE) %>%
mutate(country = case_when(grepl(uk_match, geo_code) ~ "UK",
grepl(ita_match, geo_code) ~ "ITA",
grepl(de_match, geo_code) ~ "DE",
grepl(fr_match, geo_code) ~ "FR",
grepl(es_match, geo_code) ~ "ES",
grepl(ie_match, geo_code) ~ "IE",
grepl(nl_match, geo_code) ~ "NL",
grepl(be_match, geo_code) ~ "BE",
grepl(at_match, geo_code) ~ "AT",
grepl(lu_match, geo_code) ~ "LU",
TRUE ~ NA_character_)) %>%
select(country, sex, age, geo_code, geo, values) %>%
## Because some geo codes in germany has DE* in front
mutate(geo = gsub("DE[0-9]{1} |DE[A-Z]{1} |FR[A-Z]{1,2} |FR1 |ES[0-9]{2,2} |Prov. |BE[0-9]{2,3} ", "", geo)) %>%
filter(!grepl("Départements|Not regionalised|RUP FR - Régions", geo))
## To quickly extract the code/names for each NUTS -- delete after done
tst %>%
filter(country == "LU") %>%
distinct(geo_code, geo) %>%
print(n = Inf) %>%
pull(geo) %>%
as.character() %>%
cat(sep = "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment