dickoa/get_ridl_metadata_wca.R

## get_ridl_metadata_wca.R
library(tidyverse)
library(janitor)
library(ridl) ## remotes::install_gitlab("dickoa/ridl")
library(popdata) ## remotes::install_gitlab("dickoa/popdata")

### Get the list of countries name, iso3 and unhcr code
ct <- pd_countries

ct <- filter(pd_countries,
             bureau == "West and Central Africa") |>
  pull(nameShort)
ct <- make_clean_names(ct)
ct <- str_replace_all(ct, "\\_", "\\-")
ct <- paste0("^", ct)
ct <- c(ct, "^registration$") # registration is the url of "Cameroon: registration"

iso <- filter(pd_countries,
             bureau == "West and Central Africa") |>
  pull(iso)
iso <- make_clean_names(iso)
iso <- str_replace_all(iso, "\\_", "\\-")
iso <- paste0("^", iso, "-")
iso <- c(iso, "^car$") # car is the url for Central African Rep.

## adding unhcr code just in case
code <- filter(pd_countries,
             bureau == "West and Central Africa") |>
  pull(code)
code <- make_clean_names(code)
code <- str_replace_all(code, "\\_", "\\-")
code <- paste0("^", code, "-")

###
cl_nm <- ridl_container_list()
cl_nm

## Filter the container name to select the one matching name, iso3 and unhcr code
cl_nm <- grep(paste0(unique(c(ct, iso, code)), collapse = "|"),
              cl_nm,
              value = TRUE)
cl_nm


## Filter container to remove container without datasets
cl <- map(cl_nm, ridl_container_show)
bool <- map_lgl(cl, \(container) container$data$package_count > 0)
cl <- cl[bool]
cl


## Checking container (and order them like in the RIDL platform)
ord <- map_int(cl, \(x) x$data$package_count)
container_df <- tibble(container_title = map_chr(cl[order(ord, decreasing = TRUE)],
                                           \(x) x$data$title),
                       container_name = map_chr(cl[order(ord, decreasing = TRUE)],
                                              \(x) x$data$name),
                       n_ds = sort(ord, decreasing = TRUE)) |>
  separate(col = "container_title",
           into = c("country", "sector"),
           sep = ": ")

### Extract dataset from containers
ridl_data <- map_dfr(cl, \(container)
                     tibble(dataset_title = map(container$data$packages,
                                                \(pkg) pkg$title),
                            dataset_name = map(container$data$packages,
                                             \(pkg) pkg$name),
                            container_name = container$data$name)) |>
  unnest(c(dataset_title, dataset_name))


### Add info from container
ridl_data <- left_join(ridl_data,
                       select(container_df, country,
                              sector, container_name),
                       by = "container_name")

glimpse(ridl_data)

###
writexl::write_xlsx(ridl_data, "./data/wca_ridl_data_21_02_2022.xlsx")
	library(tidyverse)
	library(janitor)
	library(ridl) ## remotes::install_gitlab("dickoa/ridl")
	library(popdata) ## remotes::install_gitlab("dickoa/popdata")

	### Get the list of countries name, iso3 and unhcr code
	ct <- pd_countries

	ct <- filter(pd_countries,
	bureau == "West and Central Africa") \|>
	pull(nameShort)
	ct <- make_clean_names(ct)
	ct <- str_replace_all(ct, "\\_", "\\-")
	ct <- paste0("^", ct)
	ct <- c(ct, "^registration$") # registration is the url of "Cameroon: registration"

	iso <- filter(pd_countries,
	bureau == "West and Central Africa") \|>
	pull(iso)
	iso <- make_clean_names(iso)
	iso <- str_replace_all(iso, "\\_", "\\-")
	iso <- paste0("^", iso, "-")
	iso <- c(iso, "^car$") # car is the url for Central African Rep.

	## adding unhcr code just in case
	code <- filter(pd_countries,
	bureau == "West and Central Africa") \|>
	pull(code)
	code <- make_clean_names(code)
	code <- str_replace_all(code, "\\_", "\\-")
	code <- paste0("^", code, "-")

	###
	cl_nm <- ridl_container_list()
	cl_nm

	## Filter the container name to select the one matching name, iso3 and unhcr code
	cl_nm <- grep(paste0(unique(c(ct, iso, code)), collapse = "\|"),
	cl_nm,
	value = TRUE)
	cl_nm


	## Filter container to remove container without datasets
	cl <- map(cl_nm, ridl_container_show)
	bool <- map_lgl(cl, \(container) container$data$package_count > 0)
	cl <- cl[bool]
	cl


	## Checking container (and order them like in the RIDL platform)
	ord <- map_int(cl, \(x) x$data$package_count)
	container_df <- tibble(container_title = map_chr(cl[order(ord, decreasing = TRUE)],
	\(x) x$data$title),
	container_name = map_chr(cl[order(ord, decreasing = TRUE)],
	\(x) x$data$name),
	n_ds = sort(ord, decreasing = TRUE)) \|>
	separate(col = "container_title",
	into = c("country", "sector"),
	sep = ": ")

	### Extract dataset from containers
	ridl_data <- map_dfr(cl, \(container)
	tibble(dataset_title = map(container$data$packages,
	\(pkg) pkg$title),
	dataset_name = map(container$data$packages,
	\(pkg) pkg$name),
	container_name = container$data$name)) \|>
	unnest(c(dataset_title, dataset_name))


	### Add info from container
	ridl_data <- left_join(ridl_data,
	select(container_df, country,
	sector, container_name),
	by = "container_name")

	glimpse(ridl_data)

	###
	writexl::write_xlsx(ridl_data, "./data/wca_ridl_data_21_02_2022.xlsx")