Created
February 21, 2022 19:02
-
-
Save dickoa/8da3b2c666e9a0cd5aa1e606f9ce8b1e to your computer and use it in GitHub Desktop.
Get the list of RIDL Datasets for West and Central Africa
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(janitor) | |
library(ridl) ## remotes::install_gitlab("dickoa/ridl") | |
library(popdata) ## remotes::install_gitlab("dickoa/popdata") | |
### Get the list of countries name, iso3 and unhcr code | |
ct <- pd_countries | |
ct <- filter(pd_countries, | |
bureau == "West and Central Africa") |> | |
pull(nameShort) | |
ct <- make_clean_names(ct) | |
ct <- str_replace_all(ct, "\\_", "\\-") | |
ct <- paste0("^", ct) | |
ct <- c(ct, "^registration$") # registration is the url of "Cameroon: registration" | |
iso <- filter(pd_countries, | |
bureau == "West and Central Africa") |> | |
pull(iso) | |
iso <- make_clean_names(iso) | |
iso <- str_replace_all(iso, "\\_", "\\-") | |
iso <- paste0("^", iso, "-") | |
iso <- c(iso, "^car$") # car is the url for Central African Rep. | |
## adding unhcr code just in case | |
code <- filter(pd_countries, | |
bureau == "West and Central Africa") |> | |
pull(code) | |
code <- make_clean_names(code) | |
code <- str_replace_all(code, "\\_", "\\-") | |
code <- paste0("^", code, "-") | |
### | |
cl_nm <- ridl_container_list() | |
cl_nm | |
## Filter the container name to select the one matching name, iso3 and unhcr code | |
cl_nm <- grep(paste0(unique(c(ct, iso, code)), collapse = "|"), | |
cl_nm, | |
value = TRUE) | |
cl_nm | |
## Filter container to remove container without datasets | |
cl <- map(cl_nm, ridl_container_show) | |
bool <- map_lgl(cl, \(container) container$data$package_count > 0) | |
cl <- cl[bool] | |
cl | |
## Checking container (and order them like in the RIDL platform) | |
ord <- map_int(cl, \(x) x$data$package_count) | |
container_df <- tibble(container_title = map_chr(cl[order(ord, decreasing = TRUE)], | |
\(x) x$data$title), | |
container_name = map_chr(cl[order(ord, decreasing = TRUE)], | |
\(x) x$data$name), | |
n_ds = sort(ord, decreasing = TRUE)) |> | |
separate(col = "container_title", | |
into = c("country", "sector"), | |
sep = ": ") | |
### Extract dataset from containers | |
ridl_data <- map_dfr(cl, \(container) | |
tibble(dataset_title = map(container$data$packages, | |
\(pkg) pkg$title), | |
dataset_name = map(container$data$packages, | |
\(pkg) pkg$name), | |
container_name = container$data$name)) |> | |
unnest(c(dataset_title, dataset_name)) | |
### Add info from container | |
ridl_data <- left_join(ridl_data, | |
select(container_df, country, | |
sector, container_name), | |
by = "container_name") | |
glimpse(ridl_data) | |
### | |
writexl::write_xlsx(ridl_data, "./data/wca_ridl_data_21_02_2022.xlsx") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment