Skip to content

Instantly share code, notes, and snippets.

@alexpghayes
Last active September 20, 2021 20:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexpghayes/6c83464958d567b4231d7ee1ec8e2486 to your computer and use it in GitHub Desktop.
Save alexpghayes/6c83464958d567b4231d7ee1ec8e2486 to your computer and use it in GitHub Desktop.
library(Matrix)
library(tidyverse)
# need these two packages that are not on CRAN
# if something goes wrong, let me know, i wrote them and will fix
# to install:
#
# > install.packages("pak")
# > pak::pkg_install("RoheLab/fastRG")
# > pak::pkg_install("RoheLab/vsp")
library(fastRG)
library(vsp)
##### part 1: simulation the underlying network --------------------------------
n <- 500 # number of nodes in the network
# network parameters
k <- 5
B <- matrix(0.1, nrow = k, ncol = k)
diag(B) <- 1
B
theta <- round(stats::rlnorm(n, 2))
pi <- c(1, 2, 4, 1, 1)
pop_dcsbm <- dcsbm(
theta = theta,
B = B,
pi = pi,
expected_density = 0.2
)
pop_dcsbm # population properties of the network
# A is the adjacency matrix of the underlying network, where contagion
# spreads along edges in this network
A <- sample_sparse(pop_dcsbm, poisson_edges = FALSE, allow_self_loops = FALSE)
A
##### part 2: simulate contagion on the network according to shalizi's model ---
num_periods <- 5
Y <- matrix(data = 0, ncol = num_periods, nrow = n)
Y[, 1] <- rbinom(n = n, size = 1, prob = 0.1)
Y
intercept <- 0.01
alpha <- 0.05
beta <- 0.1
gamma <- c(0.15, 0, 0, 0.05, 0.1)
C <- eigs_sym(pop_dcsbm)$vectors
for (t in 2:num_periods) {
mu_t <- intercept + alpha * Y[, t - 1] + beta * A %*% Y[, t - 1] + C %*% gamma
y_t <- mu_t + rnorm(n, sd = 0.1)
Y[, t] <- drop(y_t)
}
# Y[i , t] is the infection level of node i at time t
Y
##### part 3: try to recover the parameters of the simulation ------------------
# most of this is data munging to get our simulated data into a nice shape
# once it's into a nice shape, all we need is linear regression
# (a) sanity check the case where we know the latent network parameters C
colnames(C) <- paste0("c", 1:k)
colnames(Y) <- paste0("y_", 1:k)
AY <- A %*% Y
colnames(AY) <- paste0("Ay_", 1:k)
X <- as.matrix(cbind(Y, AY, C))
# this is the data, just not quite in a format we can do linear regression
# on yet. we want the coefficient for a lagged Ay term, which measures
# how much infections spread on the network
X
fit_known_c <- as_tibble(X) %>%
mutate(node = row_number()) %>%
pivot_longer(
cols = contains("y"),
names_to = c("feature", "time"),
names_sep = "_"
) %>%
pivot_wider(
names_from = "feature",
values_from = "value"
) %>%
arrange(node, time) %>%
group_by(node) %>%
mutate(
y_lag = dplyr::lag(y),
Ay_lag = dplyr::lag(Ay)
) %>%
ungroup(node) %>%
select(node, time, y, y_lag, Ay_lag, contains("c")) %>%
select(y, y_lag, Ay_lag, contains("c")) %>%
lm(y ~ ., data = .)
summary(fit_known_c)
# (b) now the case where we have to estimate the latent network parameters
fa <- vsp(A, rank = k) # latent parameter estimation
fa
C_hat <- fa$u
colnames(C_hat) <- paste0("c", 1:k)
# copy-pasta of data munging now using estimates instead of population
# parameters
X <- as.matrix(cbind(Y, AY, C_hat))
fit <- as_tibble(X) %>%
mutate(node = row_number()) %>%
pivot_longer(
cols = contains("y"),
names_to = c("feature", "time"),
names_sep = "_"
) %>%
pivot_wider(
names_from = "feature",
values_from = "value"
) %>%
arrange(node, time) %>%
group_by(node) %>%
mutate(
y_lag = dplyr::lag(y),
Ay_lag = dplyr::lag(Ay)
) %>%
ungroup(node) %>%
select(node, time, y, y_lag, Ay_lag, contains("c")) %>%
select(y, y_lag, Ay_lag, contains("c")) %>%
lm(y ~ ., data = .)
summary(fit)
# plot results
broom::tidy(fit_known_c, conf.int = TRUE) %>%
mutate(true_value = c(intercept, alpha, beta, gamma)) %>%
filter(!str_detect(term, "c[0-9]")) %>%
ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
geom_pointrange(alpha = 0.5) +
geom_point(aes(y = true_value), shape = 4, size = 4, color = "darkgreen") +
coord_flip() +
theme_classic() +
labs(
title = "95 percent confidence intervals for estimated parameters when C is known",
x = "Parameter",
y = "",
caption = "True parameter value at the green X"
)
broom::tidy(fit, conf.int = TRUE) %>%
mutate(true_value = c(intercept, alpha, beta, gamma)) %>%
filter(!str_detect(term, "c[0-9]")) %>%
ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
geom_pointrange(alpha = 0.5) +
geom_point(aes(y = true_value), shape = 4, size = 4, color = "darkgreen") +
coord_flip() +
theme_classic() +
labs(
title = "95 percent confidence intervals for estimated parameters when C is estimated",
x = "Parameter",
y = "",
caption = "True parameter value at the green X"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment