olihawkins/coronavirus-api.R

## coronavirus-api.R
# Functions for downloading data on Coronavirus cases in the UK by country,
# region, NHS region, and local authority from the government dashboard API.
#
# The developer guide is here:
#
# https://coronavirus.data.gov.uk/developers-guide
#
# The code below is based on the example code shown there.
#
# Run ...
#
# download_datasets()
#
# ... to download data as CSVs into the current directory.
#
# Note that this script only downloads a subset of the available data. You can
# change the area types and metrics selected for download by modifying the
# data requested in download_datasets().

# Imports ---------------------------------------------------------------------

library(dplyr)
library(httr)
library(janitor)
library(jsonlite)
library(purrr)
library(readr)
library(stringr)

# Functions -------------------------------------------------------------------

#' Extracts paginated data by requesting all of the pages
#' and combining the results.
#'
#' @param filters API filters. See the API documentations for
#'   additional information.
#' @param structure Structure parameter. See the API documentations
#'   for additional information.
#' @return A tibble containing data for the given ``filter`` and ``structure`.`

get_paginated_data <- function (filters, structure) {

    endpoint <- "https://api.coronavirus.data.gov.uk/v1/data"
    results <- list()
    current_page <- 1

    repeat {

        query <- list(
            filters = paste(filters, collapse = ";"),
            structure = toJSON(structure, auto_unbox = TRUE),
            page = current_page)

        response <- GET(url = endpoint, query = query, timeout(30))

        # Handle errors
        if (response$status_code >= 400) {
            err_msg = http_status(response)
            stop(err_msg)
        } else if (response$status_code == 204) {
            break
        }

        # Convert response to JSON
        json_text <- content(response, "text")
        dt <- fromJSON(json_text)
        results <- rbind(results, dt$data)

        if (is.null( dt$pagination$`next`)) break

        current_page <- current_page + 1;
    }

    results %>%
        as_tibble() %>%
        clean_names() %>%
        mutate(date = as.Date(date)) %>%
        arrange(area_name, date)
}

#' Downloads specific metrics for all areas in a collection of area types.
#'
#' @param dataset_dir The directory into which datasets will be downloaded.
#'   The default is the current directory.
#' @return A named list of tibbles containing the data for each area type.

download_datasets <- function(dataset_dir = ".") {

    # List area types to use as filters
    area_types <- c(
        "nation",
        "region",
        "ltla",
        "nhsRegion")

    # Create the structure as a list of metrics
    structure <- list(
        date = "date",
        areaName = "areaName",
        areaCode = "areaCode",
        newCasesBySpecimenDate = "newCasesBySpecimenDate",
        cumCasesBySpecimenDate = "cumCasesBySpecimenDate",
        cumCasesBySpecimenDateRate = "cumCasesBySpecimenDateRate",
        newAdmissions = "newAdmissions",
        cumAdmissions = "cumAdmissions",
        covidOccuppiedMVBeds = "covidOccupiedMVBeds",
        hospitalCases = "hospitalCases")

    data <- map(area_types, function(area_type){
        filters <- str_glue("areaType={area_type}")
        results <- get_paginated_data(filters, structure)
        filename <- file.path(
            dataset_dir,
            str_glue("coronavirus-cases-{area_type}.csv"))
        write_csv(results, filename)
    })

    names(data) <- area_types
    data
}
	# Functions for downloading data on Coronavirus cases in the UK by country,
	# region, NHS region, and local authority from the government dashboard API.
	#
	# The developer guide is here:
	#
	# https://coronavirus.data.gov.uk/developers-guide
	#
	# The code below is based on the example code shown there.
	#
	# Run ...
	#
	# download_datasets()
	#
	# ... to download data as CSVs into the current directory.
	#
	# Note that this script only downloads a subset of the available data. You can
	# change the area types and metrics selected for download by modifying the
	# data requested in download_datasets().

	# Imports ---------------------------------------------------------------------

	library(dplyr)
	library(httr)
	library(janitor)
	library(jsonlite)
	library(purrr)
	library(readr)
	library(stringr)

	# Functions -------------------------------------------------------------------

	#' Extracts paginated data by requesting all of the pages
	#' and combining the results.
	#'
	#' @param filters API filters. See the API documentations for
	#' additional information.
	#' @param structure Structure parameter. See the API documentations
	#' for additional information.
	#' @return A tibble containing data for the given ``filter`` and ``structure`.`

	get_paginated_data <- function (filters, structure) {

	endpoint <- "https://api.coronavirus.data.gov.uk/v1/data"
	results <- list()
	current_page <- 1

	repeat {

	query <- list(
	filters = paste(filters, collapse = ";"),
	structure = toJSON(structure, auto_unbox = TRUE),
	page = current_page)

	response <- GET(url = endpoint, query = query, timeout(30))

	# Handle errors
	if (response$status_code >= 400) {
	err_msg = http_status(response)
	stop(err_msg)
	} else if (response$status_code == 204) {
	break
	}

	# Convert response to JSON
	json_text <- content(response, "text")
	dt <- fromJSON(json_text)
	results <- rbind(results, dt$data)

	if (is.null( dt$pagination$`next`)) break

	current_page <- current_page + 1;
	}

	results %>%
	as_tibble() %>%
	clean_names() %>%
	mutate(date = as.Date(date)) %>%
	arrange(area_name, date)
	}

	#' Downloads specific metrics for all areas in a collection of area types.
	#'
	#' @param dataset_dir The directory into which datasets will be downloaded.
	#' The default is the current directory.
	#' @return A named list of tibbles containing the data for each area type.

	download_datasets <- function(dataset_dir = ".") {

	# List area types to use as filters
	area_types <- c(
	"nation",
	"region",
	"ltla",
	"nhsRegion")

	# Create the structure as a list of metrics
	structure <- list(
	date = "date",
	areaName = "areaName",
	areaCode = "areaCode",
	newCasesBySpecimenDate = "newCasesBySpecimenDate",
	cumCasesBySpecimenDate = "cumCasesBySpecimenDate",
	cumCasesBySpecimenDateRate = "cumCasesBySpecimenDateRate",
	newAdmissions = "newAdmissions",
	cumAdmissions = "cumAdmissions",
	covidOccuppiedMVBeds = "covidOccupiedMVBeds",
	hospitalCases = "hospitalCases")

	data <- map(area_types, function(area_type){
	filters <- str_glue("areaType={area_type}")
	results <- get_paginated_data(filters, structure)
	filename <- file.path(
	dataset_dir,
	str_glue("coronavirus-cases-{area_type}.csv"))
	write_csv(results, filename)
	})

	names(data) <- area_types
	data
	}