Skip to content

Instantly share code, notes, and snippets.

@dickoa
Created February 21, 2022 19:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dickoa/8da3b2c666e9a0cd5aa1e606f9ce8b1e to your computer and use it in GitHub Desktop.
Save dickoa/8da3b2c666e9a0cd5aa1e606f9ce8b1e to your computer and use it in GitHub Desktop.
Get the list of RIDL Datasets for West and Central Africa
library(tidyverse)
library(janitor)
library(ridl) ## remotes::install_gitlab("dickoa/ridl")
library(popdata) ## remotes::install_gitlab("dickoa/popdata")
### Get the list of countries name, iso3 and unhcr code
ct <- pd_countries
ct <- filter(pd_countries,
bureau == "West and Central Africa") |>
pull(nameShort)
ct <- make_clean_names(ct)
ct <- str_replace_all(ct, "\\_", "\\-")
ct <- paste0("^", ct)
ct <- c(ct, "^registration$") # registration is the url of "Cameroon: registration"
iso <- filter(pd_countries,
bureau == "West and Central Africa") |>
pull(iso)
iso <- make_clean_names(iso)
iso <- str_replace_all(iso, "\\_", "\\-")
iso <- paste0("^", iso, "-")
iso <- c(iso, "^car$") # car is the url for Central African Rep.
## adding unhcr code just in case
code <- filter(pd_countries,
bureau == "West and Central Africa") |>
pull(code)
code <- make_clean_names(code)
code <- str_replace_all(code, "\\_", "\\-")
code <- paste0("^", code, "-")
###
cl_nm <- ridl_container_list()
cl_nm
## Filter the container name to select the one matching name, iso3 and unhcr code
cl_nm <- grep(paste0(unique(c(ct, iso, code)), collapse = "|"),
cl_nm,
value = TRUE)
cl_nm
## Filter container to remove container without datasets
cl <- map(cl_nm, ridl_container_show)
bool <- map_lgl(cl, \(container) container$data$package_count > 0)
cl <- cl[bool]
cl
## Checking container (and order them like in the RIDL platform)
ord <- map_int(cl, \(x) x$data$package_count)
container_df <- tibble(container_title = map_chr(cl[order(ord, decreasing = TRUE)],
\(x) x$data$title),
container_name = map_chr(cl[order(ord, decreasing = TRUE)],
\(x) x$data$name),
n_ds = sort(ord, decreasing = TRUE)) |>
separate(col = "container_title",
into = c("country", "sector"),
sep = ": ")
### Extract dataset from containers
ridl_data <- map_dfr(cl, \(container)
tibble(dataset_title = map(container$data$packages,
\(pkg) pkg$title),
dataset_name = map(container$data$packages,
\(pkg) pkg$name),
container_name = container$data$name)) |>
unnest(c(dataset_title, dataset_name))
### Add info from container
ridl_data <- left_join(ridl_data,
select(container_df, country,
sector, container_name),
by = "container_name")
glimpse(ridl_data)
###
writexl::write_xlsx(ridl_data, "./data/wca_ridl_data_21_02_2022.xlsx")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment