Skip to content

Instantly share code, notes, and snippets.

@avallecam
Last active May 31, 2024 14:04
Show Gist options
  • Save avallecam/9a09c69263ae2a4cedae7d45540e0e74 to your computer and use it in GitHub Desktop.
Save avallecam/9a09c69263ae2a4cedae7d45540e0e74 to your computer and use it in GitHub Desktop.
functions from {epiparameter} and {epidemics} get tibble-friendly ouputs with list columns from nested data or multiple iterations made internally
``` r
library(tidyverse)
# epiparameter ------------------------------------------------------------
library(epiparameter)
# get set of epidist objects
epidist_input <- epiparameter::epidist_db(
disease = "covid",
epi_dist = "serial"
)
#> Returning 4 results that match the criteria (3 are parameterised).
#> Use subset to filter by entry variables or single_epidist to return a single entry.
#> To retrieve the citation for each use the 'get_citation' function
# table output
epidist_input %>% epiparameter::parameter_tbl()
#> # Parameter table:
#> # A data frame: 4 × 7
#> disease pathogen epi_distribution prob_distribution author year sample_size
#> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
#> 1 COVID-19 SARS-CoV… serial interval <NA> Alene… 2021 3924
#> 2 COVID-19 SARS-CoV… serial interval lnorm Nishi… 2020 28
#> 3 COVID-19 SARS-CoV… serial interval weibull Nishi… 2020 18
#> 4 COVID-19 SARS-CoV… serial interval norm Yang … 2020 131
# tibble output
epidist_tibble <- epidist_input %>%
as_tibble()
#> Warning: In as.data.frame.multi_epidist(value, stringsAsFactors = FALSE) :
#> extra argument 'stringsAsFactors' will be disregarded
## * list columns + tidyr unnest ---------------------------------------------
# list columns
# prob_distribution:method_assess
epidist_tibble %>%
dplyr::select(disease,epi_distribution,citation,metadata)
#> # A tibble: 4 × 4
#> disease epi_distribution citation metadata
#> <chr> <chr> <I<list>> <I<list>>
#> 1 COVID-19 serial interval <named list [6]> <named list [6]>
#> 2 COVID-19 serial interval <named list [6]> <named list [6]>
#> 3 COVID-19 serial interval <named list [6]> <named list [6]>
#> 4 COVID-19 serial interval <named list [6]> <named list [6]>
# unnest metadata
epidist_tibble %>%
dplyr::select(disease,epi_distribution,citation,metadata) %>%
tidyr::unnest_wider(col = metadata)
#> # A tibble: 4 × 9
#> disease epi_distribution citation sample_size region transmission_mode
#> <chr> <chr> <I<list>> <int> <chr> <chr>
#> 1 COVID-19 serial interval <named list> 3924 Mixed natural_natural_…
#> 2 COVID-19 serial interval <named list> 28 Mixed natural_natural_…
#> 3 COVID-19 serial interval <named list> 18 Mixed natural_natural_…
#> 4 COVID-19 serial interval <named list> 131 Hubei pr… natural_natural_…
#> # ℹ 3 more variables: vector <chr>, extrinsic <lgl>, inference_method <chr>
# epidemics ---------------------------------------------------------------
library(epidemics)
#> Warning: package 'epidemics' was built under R version 4.3.3
## contact data ----------------------------------------
polymod <- socialmixr::polymod
contact_data <- socialmixr::contact_matrix(
survey = polymod,
countries = "United Kingdom",
age.limits = c(0, 20, 40),
symmetric = TRUE
)
#> Using POLYMOD social contact data. To cite this in a publication, use the 'get_citation()' function
#> Removing participants that have contacts without age information. To change this behaviour, set the 'missing.contact.age' option
# prepare contact matrix
contact_matrix <- t(contact_data$matrix)
contact_matrix
#>
#> contact.age.group [,1] [,2] [,3]
#> [0,20) 7.883663 2.794154 1.565665
#> [20,40) 3.120220 4.854839 2.624868
#> 40+ 3.063895 4.599893 5.005571
demography_vector <- contact_data$demography$population
names(demography_vector) <- rownames(contact_matrix)
demography_vector
#> [0,20) [20,40) 40+
#> 14799290 16526302 28961159
## initial conditions ----------------------------------------
initial_i <- 1e-6
initial_conditions_inf <- c(
S = 1 - initial_i, E = 0, I = initial_i, R = 0, V = 0
)
# combine the initial conditions
initial_conditions <- rbind(
initial_conditions_inf, # age group 1
initial_conditions_inf, # age group 2
initial_conditions_inf # age group 3
)
# use contact matrix to assign age group names
rownames(initial_conditions) <- rownames(contact_matrix)
initial_conditions
#> S E I R V
#> [0,20) 0.999999 0 1e-06 0 0
#> [20,40) 0.999999 0 1e-06 0 0
#> 40+ 0.999999 0 1e-06 0 0
## population data ----------------------------------------
uk_population <- epidemics::population(
name = "UK",
contact_matrix = contact_matrix,
demography_vector = demography_vector,
initial_conditions = initial_conditions
)
## rates ----------------------------------------
# time periods
preinfectious_period <- 3.0
infectious_period <- 7.0
# rates
infectiousness_rate <- 1.0 / preinfectious_period
recovery_rate <- 1.0 / infectious_period
# reproduction number
# get R mean and sd over time
r_estimate_mean <- 1.5
r_estimate_sd <- 0.05
# Generate 100 R samples
r_samples <- withr::with_seed(
seed = 1,
rnorm(
n = 100, mean = r_estimate_mean, sd = r_estimate_sd
)
)
beta <- r_samples / infectious_period
## run epidemics ----------------------------------------
# run model default
output_samples <- epidemics::model_default(
population = uk_population,
transmission_rate = beta,
infectiousness_rate = infectiousness_rate,
recovery_rate = recovery_rate,
time_end = 600, increment = 1
)
## * list columns + tidyr unnest ---------------------------------------------
# list columns
# population:time_dependence, scenario, data
output_samples %>%
as_tibble()
#> # A tibble: 100 × 12
#> transmission_rate infectiousness_rate recovery_rate time_end param_set
#> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 0.210 0.333 0.143 600 1
#> 2 0.216 0.333 0.143 600 2
#> 3 0.208 0.333 0.143 600 3
#> 4 0.226 0.333 0.143 600 4
#> 5 0.217 0.333 0.143 600 5
#> 6 0.208 0.333 0.143 600 6
#> 7 0.218 0.333 0.143 600 7
#> 8 0.220 0.333 0.143 600 8
#> 9 0.218 0.333 0.143 600 9
#> 10 0.212 0.333 0.143 600 10
#> # ℹ 90 more rows
#> # ℹ 7 more variables: population <list>, intervention <list>,
#> # vaccination <list>, time_dependence <list>, increment <dbl>,
#> # scenario <int>, data <list>
# unnest data
output_samples %>%
dplyr::mutate(r_value = r_samples) %>%
tidyr::unnest(data)
#> # A tibble: 901,500 × 16
#> transmission_rate infectiousness_rate recovery_rate time_end param_set
#> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 0.210 0.333 0.143 600 1
#> 2 0.210 0.333 0.143 600 1
#> 3 0.210 0.333 0.143 600 1
#> 4 0.210 0.333 0.143 600 1
#> 5 0.210 0.333 0.143 600 1
#> 6 0.210 0.333 0.143 600 1
#> 7 0.210 0.333 0.143 600 1
#> 8 0.210 0.333 0.143 600 1
#> 9 0.210 0.333 0.143 600 1
#> 10 0.210 0.333 0.143 600 1
#> # ℹ 901,490 more rows
#> # ℹ 11 more variables: population <list>, intervention <list>,
#> # vaccination <list>, time_dependence <list>, increment <dbl>,
#> # scenario <int>, time <dbl>, demography_group <chr>, compartment <chr>,
#> # value <dbl>, r_value <dbl>
```
<sup>Created on 2024-05-31 with [reprex v2.1.0](https://reprex.tidyverse.org)</sup>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment