Skip to content

Instantly share code, notes, and snippets.

@olihawkins
Last active June 10, 2021 08:50
Show Gist options
  • Save olihawkins/6962c6df563e3dbea8917dbf6fd4ab01 to your computer and use it in GitHub Desktop.
Save olihawkins/6962c6df563e3dbea8917dbf6fd4ab01 to your computer and use it in GitHub Desktop.
Download data on Coronavirus cases and deaths from the government dashboard API
# Functions for downloading data on Coronavirus cases in the UK by country,
# region, NHS region, and local authority from the government dashboard API.
#
# The developer guide is here:
#
# https://coronavirus.data.gov.uk/developers-guide
#
# The code below is based on the example code shown there.
#
# Run ...
#
# download_datasets()
#
# ... to download data as CSVs into the current directory.
#
# Note that this script only downloads a subset of the available data. You can
# change the area types and metrics selected for download by modifying the
# data requested in download_datasets().
# Imports ---------------------------------------------------------------------
library(dplyr)
library(httr)
library(janitor)
library(jsonlite)
library(purrr)
library(readr)
library(stringr)
# Functions -------------------------------------------------------------------
#' Extracts paginated data by requesting all of the pages
#' and combining the results.
#'
#' @param filters API filters. See the API documentations for
#' additional information.
#' @param structure Structure parameter. See the API documentations
#' for additional information.
#' @return A tibble containing data for the given ``filter`` and ``structure`.`
get_paginated_data <- function (filters, structure) {
endpoint <- "https://api.coronavirus.data.gov.uk/v1/data"
results <- list()
current_page <- 1
repeat {
query <- list(
filters = paste(filters, collapse = ";"),
structure = toJSON(structure, auto_unbox = TRUE),
page = current_page)
response <- GET(url = endpoint, query = query, timeout(30))
# Handle errors
if (response$status_code >= 400) {
err_msg = http_status(response)
stop(err_msg)
} else if (response$status_code == 204) {
break
}
# Convert response to JSON
json_text <- content(response, "text")
dt <- fromJSON(json_text)
results <- rbind(results, dt$data)
if (is.null( dt$pagination$`next`)) break
current_page <- current_page + 1;
}
results %>%
as_tibble() %>%
clean_names() %>%
mutate(date = as.Date(date)) %>%
arrange(area_name, date)
}
#' Downloads specific metrics for all areas in a collection of area types.
#'
#' @param dataset_dir The directory into which datasets will be downloaded.
#' The default is the current directory.
#' @return A named list of tibbles containing the data for each area type.
download_datasets <- function(dataset_dir = ".") {
# List area types to use as filters
area_types <- c(
"nation",
"region",
"ltla",
"nhsRegion")
# Create the structure as a list of metrics
structure <- list(
date = "date",
areaName = "areaName",
areaCode = "areaCode",
newCasesBySpecimenDate = "newCasesBySpecimenDate",
cumCasesBySpecimenDate = "cumCasesBySpecimenDate",
cumCasesBySpecimenDateRate = "cumCasesBySpecimenDateRate",
newAdmissions = "newAdmissions",
cumAdmissions = "cumAdmissions",
covidOccuppiedMVBeds = "covidOccupiedMVBeds",
hospitalCases = "hospitalCases")
data <- map(area_types, function(area_type){
filters <- str_glue("areaType={area_type}")
results <- get_paginated_data(filters, structure)
filename <- file.path(
dataset_dir,
str_glue("coronavirus-cases-{area_type}.csv"))
write_csv(results, filename)
})
names(data) <- area_types
data
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment