|
# Functions for downloading data on Coronavirus cases in the UK by country, |
|
# region, NHS region, and local authority from the government dashboard API. |
|
# |
|
# The developer guide is here: |
|
# |
|
# https://coronavirus.data.gov.uk/developers-guide |
|
# |
|
# The code below is based on the example code shown there. |
|
# |
|
# Run ... |
|
# |
|
# download_datasets() |
|
# |
|
# ... to download data as CSVs into the current directory. |
|
# |
|
# Note that this script only downloads a subset of the available data. You can |
|
# change the area types and metrics selected for download by modifying the |
|
# data requested in download_datasets(). |
|
|
|
# Imports --------------------------------------------------------------------- |
|
|
|
library(dplyr) |
|
library(httr) |
|
library(janitor) |
|
library(jsonlite) |
|
library(purrr) |
|
library(readr) |
|
library(stringr) |
|
|
|
# Functions ------------------------------------------------------------------- |
|
|
|
#' Extracts paginated data by requesting all of the pages |
|
#' and combining the results. |
|
#' |
|
#' @param filters API filters. See the API documentations for |
|
#' additional information. |
|
#' @param structure Structure parameter. See the API documentations |
|
#' for additional information. |
|
#' @return A tibble containing data for the given ``filter`` and ``structure`.` |
|
|
|
get_paginated_data <- function (filters, structure) { |
|
|
|
endpoint <- "https://api.coronavirus.data.gov.uk/v1/data" |
|
results <- list() |
|
current_page <- 1 |
|
|
|
repeat { |
|
|
|
query <- list( |
|
filters = paste(filters, collapse = ";"), |
|
structure = toJSON(structure, auto_unbox = TRUE), |
|
page = current_page) |
|
|
|
response <- GET(url = endpoint, query = query, timeout(30)) |
|
|
|
# Handle errors |
|
if (response$status_code >= 400) { |
|
err_msg = http_status(response) |
|
stop(err_msg) |
|
} else if (response$status_code == 204) { |
|
break |
|
} |
|
|
|
# Convert response to JSON |
|
json_text <- content(response, "text") |
|
dt <- fromJSON(json_text) |
|
results <- rbind(results, dt$data) |
|
|
|
if (is.null( dt$pagination$`next`)) break |
|
|
|
current_page <- current_page + 1; |
|
} |
|
|
|
results %>% |
|
as_tibble() %>% |
|
clean_names() %>% |
|
mutate(date = as.Date(date)) %>% |
|
arrange(area_name, date) |
|
} |
|
|
|
#' Downloads specific metrics for all areas in a collection of area types. |
|
#' |
|
#' @param dataset_dir The directory into which datasets will be downloaded. |
|
#' The default is the current directory. |
|
#' @return A named list of tibbles containing the data for each area type. |
|
|
|
download_datasets <- function(dataset_dir = ".") { |
|
|
|
# List area types to use as filters |
|
area_types <- c( |
|
"nation", |
|
"region", |
|
"ltla", |
|
"nhsRegion") |
|
|
|
# Create the structure as a list of metrics |
|
structure <- list( |
|
date = "date", |
|
areaName = "areaName", |
|
areaCode = "areaCode", |
|
newCasesBySpecimenDate = "newCasesBySpecimenDate", |
|
cumCasesBySpecimenDate = "cumCasesBySpecimenDate", |
|
cumCasesBySpecimenDateRate = "cumCasesBySpecimenDateRate", |
|
newAdmissions = "newAdmissions", |
|
cumAdmissions = "cumAdmissions", |
|
covidOccuppiedMVBeds = "covidOccupiedMVBeds", |
|
hospitalCases = "hospitalCases") |
|
|
|
data <- map(area_types, function(area_type){ |
|
filters <- str_glue("areaType={area_type}") |
|
results <- get_paginated_data(filters, structure) |
|
filename <- file.path( |
|
dataset_dir, |
|
str_glue("coronavirus-cases-{area_type}.csv")) |
|
write_csv(results, filename) |
|
}) |
|
|
|
names(data) <- area_types |
|
data |
|
} |