Last active
November 14, 2023 18:28
-
-
Save seabbs/4f09d7609df298db7a86c31612ff9d17 to your computer and use it in GitHub Desktop.
Example of using EpiNow2 to forecast Covid-19 deaths from Covid-19 cases (both observed and forecast) for a country in the ECDC dataset. See the documentation for more details: https://epiforecasts.io/EpiNow2/dev/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set number of cores to use fitting the model | |
# no benefit on runtime if cores > chains which is set to 4 by default | |
options(mc.cores = 4) | |
# Packages ---------------------------------------------------------------- | |
# install.packages(c("data.table", "remotes", "ggplot2")) | |
# remotes::install_github("epiforecasts/EpiNow2") | |
# remotes::install_github("epiforecasts/covidregionaldata") | |
library(data.table) | |
library(ggplot2) | |
library(EpiNow2) | |
library(covidregionaldata) | |
# Data -------------------------------------------------------------------- | |
# target country (must be present in ECDC data) | |
country <- "us" | |
# precleaned ECDC data - alternative is to bring your own | |
reported_cases <- covidregionaldata::get_national_data(country, source = "ecdc") | |
reported_cases <- data.table::setDT(reported_cases) | |
reported_cases <- reported_cases[, .(date, primary = cases_new, secondary = deaths_new)] | |
# filter to the last 3 months of data | |
reported_cases <- reported_cases[date >= (max(date) - 12*7)] | |
train <- reported_cases[date < (max(date) - 7*2)] | |
test <- reported_cases[date >= (max(date) - 7*2)] | |
# Estimate relationship between cases and deaths -------------------------- | |
# estimate the relationship between deaths and cases using just the last 6 weeks of data | |
# here we use a prior with a mean of 14 days and a standard deviation of 7 days for the | |
# delay between case report and death | |
cases_to_deaths <- estimate_secondary(train[date >= (max(date) - 7*6)], | |
delays = delay_opts(list(mean = 2.5, mean_sd = 0.2, | |
sd = 0.47, sd_sd = 0.1, max = 21)), | |
secondary = secondary_opts(type = "incidence"), | |
obs = obs_opts(scale = list(mean = 0.01, sd = 0.0025)), | |
control = list(adapt_delta = 0.95)) | |
plot(cases_to_deaths) | |
ggsave("cases_to_deaths_fit.png") | |
# Forecast using observed hold out cases ---------------------------------- | |
deaths_forecast <- forecast_secondary(cases_to_deaths, copy(test)[, .(date, value = primary)]) | |
plot(deaths_forecast, new_obs = reported_cases, from = min(test$date)) | |
ggsave("cases_to_deaths_forecast.png") | |
# Forecast cases from train ----------------------------------------------- | |
# uses latent infection Rt model - any forecasting approach could be used here. | |
# literature distributions - please reach out if there are others you think should be supported | |
generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani") | |
incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer") | |
# define reporting delay as lognormal with mean of 4 days and sd of 1 day in absence of | |
# evidence. If data on onset -> report then can use estimate_delay to estimate the delay | |
reporting_delay <- list(mean = convert_to_logmean(4, 1), | |
mean_sd = 0.1, | |
sd = convert_to_logsd(4, 1), | |
sd_sd = 0.1, | |
max = 15) | |
# use a weekly random walk to speed up compute of Rt model | |
# assume future case Rt is constant | |
case_forecast <- epinow(reported_cases = copy(train)[, .(date, confirm = primary)], | |
generation_time = generation_time, | |
delays = delay_opts(incubation_period, reporting_delay), | |
rt = rt_opts(prior = list(mean = 1.5, sd = 0.5), rw = 7), | |
gp = NULL, horizon = 21) | |
plot(case_forecast) | |
ggsave("cases_forecast.png") | |
# Forecast deaths using forecast cases ------------------------------------ | |
# optionally here any other forecast model could be used as long as it produces posterior | |
# samples or equivalent (in which cases primary should be a data frame with data, sample, and | |
# value variables). | |
deaths_unknown_case_forecast <- forecast_secondary(cases_to_deaths, case_forecast$estimates) | |
plot(deaths_unknown_case_forecast, new_obs = reported_cases, | |
from = min(test$date), to = max(test$date)) | |
ggsave("unknown_cases_to_deaths_forecast.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Forecasting Covid-19 deaths in the US using Covid-19 test positive cases
Performance on a short time horizon seems fairly good as expected but care needs to be taken to not fit to overly long time series when estimating the relationship between deaths and cases as the impact of changing testing can introduce bias. This can likely be mitigated by fitting to a more stable primary predictor such as hospital admissions though this may also induce bias in the presence of time-varying community deaths. Code available here.
Model fit of Covid-19 deaths predicted by test positive Covid-19 cases in the US
Forecast performance on a held out sample of Covid-19 deaths but with test positive cases known in the forecast window. This therefore represents a best case for performance.
Summary of a forecast for test positive Covid-19 cases using the package default Rt model but with a weekly random walk on Rt rather than a Gaussian process and a fixed Rt beyond the forecast horizon.
Forecast performance on a held out sample of Covid-19 deaths using the forecast for test positive Covid-19 cases (i.e a real use case where future test positive cases are unknown).