Last active
September 20, 2021 20:24
-
-
Save alexpghayes/6c83464958d567b4231d7ee1ec8e2486 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(Matrix) | |
library(tidyverse) | |
# need these two packages that are not on CRAN | |
# if something goes wrong, let me know, i wrote them and will fix | |
# to install: | |
# | |
# > install.packages("pak") | |
# > pak::pkg_install("RoheLab/fastRG") | |
# > pak::pkg_install("RoheLab/vsp") | |
library(fastRG) | |
library(vsp) | |
##### part 1: simulation the underlying network -------------------------------- | |
n <- 500 # number of nodes in the network | |
# network parameters | |
k <- 5 | |
B <- matrix(0.1, nrow = k, ncol = k) | |
diag(B) <- 1 | |
B | |
theta <- round(stats::rlnorm(n, 2)) | |
pi <- c(1, 2, 4, 1, 1) | |
pop_dcsbm <- dcsbm( | |
theta = theta, | |
B = B, | |
pi = pi, | |
expected_density = 0.2 | |
) | |
pop_dcsbm # population properties of the network | |
# A is the adjacency matrix of the underlying network, where contagion | |
# spreads along edges in this network | |
A <- sample_sparse(pop_dcsbm, poisson_edges = FALSE, allow_self_loops = FALSE) | |
A | |
##### part 2: simulate contagion on the network according to shalizi's model --- | |
num_periods <- 5 | |
Y <- matrix(data = 0, ncol = num_periods, nrow = n) | |
Y[, 1] <- rbinom(n = n, size = 1, prob = 0.1) | |
Y | |
intercept <- 0.01 | |
alpha <- 0.05 | |
beta <- 0.1 | |
gamma <- c(0.15, 0, 0, 0.05, 0.1) | |
C <- eigs_sym(pop_dcsbm)$vectors | |
for (t in 2:num_periods) { | |
mu_t <- intercept + alpha * Y[, t - 1] + beta * A %*% Y[, t - 1] + C %*% gamma | |
y_t <- mu_t + rnorm(n, sd = 0.1) | |
Y[, t] <- drop(y_t) | |
} | |
# Y[i , t] is the infection level of node i at time t | |
Y | |
##### part 3: try to recover the parameters of the simulation ------------------ | |
# most of this is data munging to get our simulated data into a nice shape | |
# once it's into a nice shape, all we need is linear regression | |
# (a) sanity check the case where we know the latent network parameters C | |
colnames(C) <- paste0("c", 1:k) | |
colnames(Y) <- paste0("y_", 1:k) | |
AY <- A %*% Y | |
colnames(AY) <- paste0("Ay_", 1:k) | |
X <- as.matrix(cbind(Y, AY, C)) | |
# this is the data, just not quite in a format we can do linear regression | |
# on yet. we want the coefficient for a lagged Ay term, which measures | |
# how much infections spread on the network | |
X | |
fit_known_c <- as_tibble(X) %>% | |
mutate(node = row_number()) %>% | |
pivot_longer( | |
cols = contains("y"), | |
names_to = c("feature", "time"), | |
names_sep = "_" | |
) %>% | |
pivot_wider( | |
names_from = "feature", | |
values_from = "value" | |
) %>% | |
arrange(node, time) %>% | |
group_by(node) %>% | |
mutate( | |
y_lag = dplyr::lag(y), | |
Ay_lag = dplyr::lag(Ay) | |
) %>% | |
ungroup(node) %>% | |
select(node, time, y, y_lag, Ay_lag, contains("c")) %>% | |
select(y, y_lag, Ay_lag, contains("c")) %>% | |
lm(y ~ ., data = .) | |
summary(fit_known_c) | |
# (b) now the case where we have to estimate the latent network parameters | |
fa <- vsp(A, rank = k) # latent parameter estimation | |
fa | |
C_hat <- fa$u | |
colnames(C_hat) <- paste0("c", 1:k) | |
# copy-pasta of data munging now using estimates instead of population | |
# parameters | |
X <- as.matrix(cbind(Y, AY, C_hat)) | |
fit <- as_tibble(X) %>% | |
mutate(node = row_number()) %>% | |
pivot_longer( | |
cols = contains("y"), | |
names_to = c("feature", "time"), | |
names_sep = "_" | |
) %>% | |
pivot_wider( | |
names_from = "feature", | |
values_from = "value" | |
) %>% | |
arrange(node, time) %>% | |
group_by(node) %>% | |
mutate( | |
y_lag = dplyr::lag(y), | |
Ay_lag = dplyr::lag(Ay) | |
) %>% | |
ungroup(node) %>% | |
select(node, time, y, y_lag, Ay_lag, contains("c")) %>% | |
select(y, y_lag, Ay_lag, contains("c")) %>% | |
lm(y ~ ., data = .) | |
summary(fit) | |
# plot results | |
broom::tidy(fit_known_c, conf.int = TRUE) %>% | |
mutate(true_value = c(intercept, alpha, beta, gamma)) %>% | |
filter(!str_detect(term, "c[0-9]")) %>% | |
ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) + | |
geom_pointrange(alpha = 0.5) + | |
geom_point(aes(y = true_value), shape = 4, size = 4, color = "darkgreen") + | |
coord_flip() + | |
theme_classic() + | |
labs( | |
title = "95 percent confidence intervals for estimated parameters when C is known", | |
x = "Parameter", | |
y = "", | |
caption = "True parameter value at the green X" | |
) | |
broom::tidy(fit, conf.int = TRUE) %>% | |
mutate(true_value = c(intercept, alpha, beta, gamma)) %>% | |
filter(!str_detect(term, "c[0-9]")) %>% | |
ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) + | |
geom_pointrange(alpha = 0.5) + | |
geom_point(aes(y = true_value), shape = 4, size = 4, color = "darkgreen") + | |
coord_flip() + | |
theme_classic() + | |
labs( | |
title = "95 percent confidence intervals for estimated parameters when C is estimated", | |
x = "Parameter", | |
y = "", | |
caption = "True parameter value at the green X" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment