This was code originally written by Nick Golding, in August 2022.
It was initially written to demonstrate using symmetrical terms and the benefit of them in conmat.
They handily demonstrate a nice plot of the different terms in a gam - effectively giving us a "contact matrix" of the effect of each term. The part where that happens is where predict
is used inside mutate.
library(mgcv)
#> Loading required package: nlme
#> This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
library(tidyverse)
library(ggplot2)
# the fake function we would like to mirror
f <- function(age) {
points <- tribble(
~x, ~y,
0, 0.1,
5, 0.1,
10, 0.1,
15, 0.1,
20, 0.1,
23, 0.5,
24, 0.7,
25, 0.9,
27, 0.9,
30, 0.9,
35, 0.9,
40, 0.9,
45, 0.9,
48, 0.9,
50, 0.9,
51, 0.7,
52, 0.5,
55, 0.1,
60, 0.1,
65, 0.1,
70, 0.1,
75, 0.1,
80, 0.1
)
interp <- splinefun(x = points$x,
y = points$y,
method = "periodic")
interp(age)
}
g <- function(age_from, age_to) {
sin(age_from / 10 - 1) * sin(age_to / 10 - 1)
}
h <- function(age_from, age_to) {
-0.1 * exp(sin(abs(age_from - age_to) / 5)) ^ 2
}
eta <- function(age_from, age_to) {
1 + f(age_from) + f(age_to) + g(age_from, age_to) + h(age_from, age_to)
}
data <- expand.grid(
age_from = 1:90,
age_to = 1:90
) %>%
mutate(
# the fake log number of contacts, as if we had the model:
# smooth(x, y) = f(x) + f(y)
term_eta = eta(age_from, age_to),
participants = 500,
# fake data (ignore number of participants and population offsets for now)
response = rpois(n(), exp(term_eta)) * participants
)
# fit with various symmetric smooth types
m <- gam(response ~
s(I(abs(age_from - age_to))) +
s(I(abs(age_from - age_to)^2)) +
s(I(abs(age_from * age_to))) +
s(I(abs(age_from + age_to))) +
s(I(pmax(age_from, age_to))) +
s(I(pmin(age_from, age_to))),
family = poisson,
offset = log(participants),
data = data)
# add terms back into the data frame
data <- data %>%
mutate(
term_intercept = m$coefficients[1],
term_offdiag = predict(m, ., type = "terms", terms = "s(I(abs(age_from - age_to)))"),
term_offdiag_2 = predict(m, ., type = "terms", terms = "s(I(abs(age_from - age_to)^2))"),
term_diag_prod = predict(m, ., type = "terms", terms = "s(I(abs(age_from * age_to)))"),
term_diag_sum = predict(m, ., type = "terms", terms = "s(I(abs(age_from + age_to)))"),
term_max = predict(m, ., type = "terms", terms = "s(I(pmax(age_from, age_to)))"),
term_min = predict(m, ., type = "terms", terms = "s(I(pmin(age_from, age_to)))"),
term_fitted = term_intercept +
term_offdiag +
term_offdiag_2 +
term_diag_prod +
term_diag_sum +
term_max +
term_min
)
# pivot_longer for plotting
data_terms <- data %>%
pivot_longer(
starts_with("term"),
names_to = "term",
values_to = "value",
names_prefix = "term_"
)
# putting eta and fitted in with the terms is a bit inelegant, but I couldn't
# think of a nicer pattern that wasn't a lot of code.
# plot the truth and fitted model
data_terms %>%
filter(term %in% c("eta", "fitted")) %>%
ggplot(
aes(
x = age_from,
y = age_to,
group = term,
fill = exp(value)
)
) +
facet_wrap(~term) +
geom_tile() +
scale_fill_viridis_c() +
theme_minimal() +
coord_fixed()
# plot the parts of the model
data_terms %>%
filter(!(term %in% c("eta", "fitted"))) %>%
ggplot(
aes(
x = age_from,
y = age_to,
group = term,
fill = value
)
) +
facet_wrap(~term) +
geom_tile() +
scale_fill_viridis_c() +
theme_minimal() +
coord_fixed()
# plot the 1D smooths
par(mfrow = c(2, 3))
plot(m)
par(mfrow = c(1, 1))
Created on 2024-05-15 with reprex v2.1.0
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.4.0 (2024-04-24)
#> os macOS Sonoma 14.3.1
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Australia/Brisbane
#> date 2024-05-15
#> pandoc 3.1.13 @ /opt/homebrew/bin/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> cli 3.6.2 2023-12-11 [1] CRAN (R 4.4.0)
#> colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.4.0)
#> curl 5.2.1 2024-03-01 [1] CRAN (R 4.4.0)
#> digest 0.6.35 2024-03-11 [1] CRAN (R 4.4.0)
#> dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.4.0)
#> evaluate 0.23 2023-11-01 [1] CRAN (R 4.4.0)
#> fansi 1.0.6 2023-12-08 [1] CRAN (R 4.4.0)
#> farver 2.1.1 2022-07-06 [1] CRAN (R 4.4.0)
#> fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.4.0)
#> forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.4.0)
#> fs 1.6.4 2024-04-25 [1] CRAN (R 4.4.0)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.4.0)
#> ggplot2 * 3.5.1 2024-04-23 [1] CRAN (R 4.4.0)
#> glue 1.7.0 2024-01-09 [1] CRAN (R 4.4.0)
#> gtable 0.3.5 2024-04-22 [1] CRAN (R 4.4.0)
#> highr 0.10 2022-12-22 [1] CRAN (R 4.4.0)
#> hms 1.1.3 2023-03-21 [1] CRAN (R 4.4.0)
#> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
#> knitr 1.46 2024-04-06 [1] CRAN (R 4.4.0)
#> labeling 0.4.3 2023-08-29 [1] CRAN (R 4.4.0)
#> lattice 0.22-6 2024-03-20 [2] CRAN (R 4.4.0)
#> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.4.0)
#> lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.4.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.4.0)
#> Matrix 1.7-0 2024-03-22 [2] CRAN (R 4.4.0)
#> mgcv * 1.9-1 2023-12-21 [2] CRAN (R 4.4.0)
#> munsell 0.5.1 2024-04-01 [1] CRAN (R 4.4.0)
#> nlme * 3.1-164 2023-11-27 [2] CRAN (R 4.4.0)
#> pillar 1.9.0 2023-03-22 [1] CRAN (R 4.4.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.4.0)
#> purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.4.0)
#> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.4.0)
#> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.4.0)
#> R.oo 1.26.0 2024-01-24 [1] CRAN (R 4.4.0)
#> R.utils 2.12.3 2023-11-18 [1] CRAN (R 4.4.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.4.0)
#> readr * 2.1.5 2024-01-10 [1] CRAN (R 4.4.0)
#> reprex 2.1.0 2024-01-11 [1] CRAN (R 4.4.0)
#> rlang 1.1.3 2024-01-10 [1] CRAN (R 4.4.0)
#> rmarkdown 2.26 2024-03-05 [1] CRAN (R 4.4.0)
#> rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.4.0)
#> scales 1.3.0 2023-11-28 [1] CRAN (R 4.4.0)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.4.0)
#> stringi 1.8.4 2024-05-06 [1] CRAN (R 4.4.0)
#> stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.4.0)
#> styler 1.10.3 2024-04-07 [1] CRAN (R 4.4.0)
#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.4.0)
#> tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.4.0)
#> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.4.0)
#> tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.4.0)
#> timechange 0.3.0 2024-01-18 [1] CRAN (R 4.4.0)
#> tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.4.0)
#> utf8 1.2.4 2023-10-22 [1] CRAN (R 4.4.0)
#> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.4.0)
#> viridisLite 0.4.2 2023-05-02 [1] CRAN (R 4.4.0)
#> withr 3.0.0 2024-01-16 [1] CRAN (R 4.4.0)
#> xfun 0.43 2024-03-25 [1] CRAN (R 4.4.0)
#> xml2 1.3.6 2023-12-04 [1] CRAN (R 4.4.0)
#> yaml 2.3.8 2023-12-11 [1] CRAN (R 4.4.0)
#>
#> [1] /Users/nick/Library/R/arm64/4.4/library
#> [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────