seabbs/forecast-covid-deaths-from-covid-cases.R

## forecast-covid-deaths-from-covid-cases.R
# set number of cores to use fitting the model
# no benefit on runtime if cores > chains which is set to 4 by default
options(mc.cores = 4)

# Packages ----------------------------------------------------------------
# install.packages(c("data.table", "remotes", "ggplot2"))
# remotes::install_github("epiforecasts/EpiNow2")
# remotes::install_github("epiforecasts/covidregionaldata")
library(data.table)
library(ggplot2)
library(EpiNow2)
library(covidregionaldata)

# Data --------------------------------------------------------------------

# target country (must be present in ECDC data)
country <- "us"

# precleaned ECDC data - alternative is to bring your own
reported_cases <- covidregionaldata::get_national_data(country, source = "ecdc")
reported_cases <- data.table::setDT(reported_cases)
reported_cases <- reported_cases[, .(date, primary = cases_new, secondary = deaths_new)]
# filter to the last 3 months of data
reported_cases <- reported_cases[date >= (max(date) - 12*7)]
train <- reported_cases[date < (max(date) - 7*2)]
test <- reported_cases[date >= (max(date) - 7*2)]

# Estimate relationship between cases and deaths --------------------------
# estimate the relationship between deaths and cases using just the last 6 weeks of data
# here we use a prior with a mean of 14 days and a standard deviation of 7 days for the
# delay between case report and death
cases_to_deaths <- estimate_secondary(train[date >= (max(date) - 7*6)],
                                      delays = delay_opts(list(mean = 2.5, mean_sd = 0.2,
                                                               sd = 0.47, sd_sd = 0.1, max = 21)),
                                      secondary = secondary_opts(type = "incidence"),
                                      obs = obs_opts(scale = list(mean = 0.01, sd = 0.0025)),
                                      control = list(adapt_delta = 0.95))
plot(cases_to_deaths)
ggsave("cases_to_deaths_fit.png")

# Forecast using observed hold out cases ----------------------------------
deaths_forecast <- forecast_secondary(cases_to_deaths, copy(test)[, .(date, value = primary)])
plot(deaths_forecast, new_obs = reported_cases, from = min(test$date))
ggsave("cases_to_deaths_forecast.png")

# Forecast cases from train -----------------------------------------------
# uses latent infection Rt model - any forecasting approach could be used here.

# literature distributions - please reach out if there are others you think should be supported
generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer")

# define reporting delay as lognormal with mean of 4 days and sd of 1 day in absence of
# evidence. If data on onset -> report then can use estimate_delay to estimate the delay
reporting_delay <- list(mean = convert_to_logmean(4, 1),
                        mean_sd = 0.1,
                        sd = convert_to_logsd(4, 1),
                        sd_sd = 0.1,
                        max = 15)

# use a weekly random walk to speed up compute of Rt model
# assume future case Rt is constant
case_forecast <- epinow(reported_cases = copy(train)[, .(date, confirm = primary)],
                        generation_time = generation_time,
                        delays = delay_opts(incubation_period, reporting_delay),
                        rt = rt_opts(prior = list(mean = 1.5, sd = 0.5), rw = 7),
                        gp = NULL, horizon = 21)
plot(case_forecast)
ggsave("cases_forecast.png")

# Forecast deaths using forecast cases ------------------------------------
# optionally here any other forecast model could be used as long as it produces posterior
# samples or equivalent (in which cases primary should be a data frame with data, sample, and
# value variables).
deaths_unknown_case_forecast <- forecast_secondary(cases_to_deaths, case_forecast$estimates)
plot(deaths_unknown_case_forecast, new_obs = reported_cases,
     from = min(test$date), to = max(test$date))
ggsave("unknown_cases_to_deaths_forecast.png")
	# set number of cores to use fitting the model
	# no benefit on runtime if cores > chains which is set to 4 by default
	options(mc.cores = 4)

	# Packages ----------------------------------------------------------------
	# install.packages(c("data.table", "remotes", "ggplot2"))
	# remotes::install_github("epiforecasts/EpiNow2")
	# remotes::install_github("epiforecasts/covidregionaldata")
	library(data.table)
	library(ggplot2)
	library(EpiNow2)
	library(covidregionaldata)

	# Data --------------------------------------------------------------------

	# target country (must be present in ECDC data)
	country <- "us"

	# precleaned ECDC data - alternative is to bring your own
	reported_cases <- covidregionaldata::get_national_data(country, source = "ecdc")
	reported_cases <- data.table::setDT(reported_cases)
	reported_cases <- reported_cases[, .(date, primary = cases_new, secondary = deaths_new)]
	# filter to the last 3 months of data
	reported_cases <- reported_cases[date >= (max(date) - 12*7)]
	train <- reported_cases[date < (max(date) - 7*2)]
	test <- reported_cases[date >= (max(date) - 7*2)]

	# Estimate relationship between cases and deaths --------------------------
	# estimate the relationship between deaths and cases using just the last 6 weeks of data
	# here we use a prior with a mean of 14 days and a standard deviation of 7 days for the
	# delay between case report and death
	cases_to_deaths <- estimate_secondary(train[date >= (max(date) - 7*6)],
	delays = delay_opts(list(mean = 2.5, mean_sd = 0.2,
	sd = 0.47, sd_sd = 0.1, max = 21)),
	secondary = secondary_opts(type = "incidence"),
	obs = obs_opts(scale = list(mean = 0.01, sd = 0.0025)),
	control = list(adapt_delta = 0.95))
	plot(cases_to_deaths)
	ggsave("cases_to_deaths_fit.png")

	# Forecast using observed hold out cases ----------------------------------
	deaths_forecast <- forecast_secondary(cases_to_deaths, copy(test)[, .(date, value = primary)])
	plot(deaths_forecast, new_obs = reported_cases, from = min(test$date))
	ggsave("cases_to_deaths_forecast.png")

	# Forecast cases from train -----------------------------------------------
	# uses latent infection Rt model - any forecasting approach could be used here.

	# literature distributions - please reach out if there are others you think should be supported
	generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
	incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer")

	# define reporting delay as lognormal with mean of 4 days and sd of 1 day in absence of
	# evidence. If data on onset -> report then can use estimate_delay to estimate the delay
	reporting_delay <- list(mean = convert_to_logmean(4, 1),
	mean_sd = 0.1,
	sd = convert_to_logsd(4, 1),
	sd_sd = 0.1,
	max = 15)

	# use a weekly random walk to speed up compute of Rt model
	# assume future case Rt is constant
	case_forecast <- epinow(reported_cases = copy(train)[, .(date, confirm = primary)],
	generation_time = generation_time,
	delays = delay_opts(incubation_period, reporting_delay),
	rt = rt_opts(prior = list(mean = 1.5, sd = 0.5), rw = 7),
	gp = NULL, horizon = 21)
	plot(case_forecast)
	ggsave("cases_forecast.png")

	# Forecast deaths using forecast cases ------------------------------------
	# optionally here any other forecast model could be used as long as it produces posterior
	# samples or equivalent (in which cases primary should be a data frame with data, sample, and
	# value variables).
	deaths_unknown_case_forecast <- forecast_secondary(cases_to_deaths, case_forecast$estimates)
	plot(deaths_unknown_case_forecast, new_obs = reported_cases,
	from = min(test$date), to = max(test$date))
	ggsave("unknown_cases_to_deaths_forecast.png")