Skip to content

Instantly share code, notes, and snippets.

@jakeybob
Created June 5, 2021 19:54
Show Gist options
  • Save jakeybob/93e7bf0600cba83475146099e3ccf102 to your computer and use it in GitHub Desktop.
Save jakeybob/93e7bf0600cba83475146099e3ccf102 to your computer and use it in GitHub Desktop.
Scotland age/sex interpolated population estimates/projections
library(tidyverse)
library(janitor)
library(lubridate)
library(zoo)
library(ckanr)
# generates age/sex population lookup for Scotland based on NRS mid-year
# population estimates/projections, interpolated linearly to day level
# (July 2nd used as mid-year ref date)
# NOTE: oldest age group will be composite e.g. "90+"
ckanr_setup(url = "https://www.opendata.nhs.scot/")
res_est <- resource_show(id = "27a72cc8-d6d8-430c-8b4f-3109a9ceadb1") # pop estimates
res_proj <- resource_show(id = "0876fc67-05e6-4e87-bc30-c4b0756fff04") # pop projections
# national population estimates (retrospective)
data_est <- ckan_fetch(x=res_est$url) %>%
clean_names() %>%
filter(hb == "S92000003",
sex != "All") %>%
select(year, sex, contains("age"), -contains("ages"))
# national population projections (for years with no estimates available)
data_proj <- ckan_fetch(x=res_proj$url) %>%
clean_names() %>%
filter(hb == "S92000003",
sex != "All") %>%
select(year, sex, contains("age"), -contains("ages")) %>%
filter(year %in% unique(data_est$year) == FALSE)
# combine and format estimate and projections
data <- data_est %>%
bind_rows(data_proj) %>%
distinct() %>%
mutate(sex = tolower(sex)) %>%
arrange(year, sex) %>%
pivot_longer(starts_with("age"), names_to = "age", values_to = "pop") %>%
mutate(age_desc = str_remove(age, "age")) %>%
mutate(age = as.integer(str_extract(age, "\\d+"))) %>%
mutate(date = dmy(paste0("02/07/", year))) %>%
select(-year)
# create df of all date/sex/age combinations, join on the population data,
# then linearly interpolate between all the mid-year points
df <- crossing(date = seq.Date(from = dmy(paste0("02/07/", min(data_est$year, data_proj$year))),
to = dmy(paste0("02/07/", max(data_est$year, data_proj$year))),
by = "day"),
sex = unique(data$sex),
age = unique(data$age)) %>%
left_join(data) %>%
group_by(sex, age) %>%
mutate(pop = na.approx(pop)) %>%
ungroup() %>%
select(date, sex, age, age_desc, pop)
# output data
write_rds(df %>% filter(date >= dmy("01/01/2015"), date <= dmy("01/01/2023")),
"scot_pop_daily.rds", compress = "gz")
# quick plot test
df %>%
filter(date >= dmy("01/01/2015"), date <= today()) %>%
filter(age %in% c(10:25)) %>%
# filter(age > 80) %>%
ggplot(aes(x = date, y = pop, colour = sex)) +
geom_line() +
facet_wrap(~age)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment