Skip to content

Instantly share code, notes, and snippets.

@gweissman
Last active September 11, 2018 15:04
Show Gist options
  • Save gweissman/8630754df8bdb3c0a1a39383cac8ba73 to your computer and use it in GitHub Desktop.
Save gweissman/8630754df8bdb3c0a1a39383cac8ba73 to your computer and use it in GitHub Desktop.
Network and diffusion process imulations
---
title: 'Aim 2: Simulated prelim data for Roybal Proposal'
author: "Gary E. Weissman, MD, MSHP"
date: "8/29/2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(igraph)
library(ggplot2)
library(ggnetwork)
library(patchwork)
library(netdiffuseR)
library(data.table)
set.seed(30)
```
## Background
## Create scale-free networks for simulation
```{r make_graphs}
N <- 250
power_law_vec <- c(1.25, 1.55, 1.75, 2.0)
sfn_list_raw <- lapply(power_law_vec,
function(p) sample_pa(N, p, directed = FALSE))
# ------------- calculate importance measures
# pagerank measures
sfn_list_attr <- lapply(sfn_list_raw,
function(g) {V(g)$pr <- igraph::page_rank(g)$vector; g})
# betweenness centrality
sfn_list_attr <- lapply(sfn_list_attr,
function(g) {V(g)$bc <- igraph::betweenness(g); g})
```
## Visualize network structures and importance properties
```{r view_graphs}
# ------------- custom plot function
# takes an igraph object and returns a ggplot object
plot_graph <- function(g, node_color = NULL) {
p <- ggplot(ggnetwork(g,
layout = "fruchtermanreingold",
arrow.gap = 0.01),
aes(x = x, y = y, xend = xend, yend = yend)) +
geom_nodes(size = 2, color = 'black', fill = 'white') +
geom_edges(size = 0.2,
arrow = arrow(length = unit(2, "pt"))) +
scale_color_continuous(low = 'lightgray', high = 'red') +
theme_blank() +
ggtitle(paste0('α = ',g$power))
if (is.null(node_color)) {
p <- p + geom_nodes(size = 2)
} else if (node_color == 'pr') {
p <- p + geom_nodes(size = 2, aes(color = pr))
} else if (node_color == 'bc') {
p <- p + geom_nodes(size = 2, aes(color = bc))
}
return(p)
}
# ------------- make individual graph plots
sfn_plot_list_plain <- lapply(sfn_list_attr, plot_graph)
sfn_plot_list_pr <- lapply(sfn_list_attr, plot_graph, node_color = 'pr')
sfn_plot_list_bc <- lapply(sfn_list_attr, plot_graph, node_color = 'bc')
# ------------- make group graph plots
comp_plot_plain <- Reduce('+', sfn_plot_list_plain)
ggsave('p_comp_plain.png', comp_plot_plain)
comp_plot_pr <- Reduce('+', sfn_plot_list_pr)
ggsave('p_comp_pr.png', comp_plot_pr)
comp_plot_bc <- Reduce('+', sfn_plot_list_bc)
ggsave('p_comp_bc.png', comp_plot_bc)
```
## Simulate diffusion process over networks
```{r sim_diff}
# ------------- custom diffusion function
run_diffusion_sim <- function(g, threshold, p_adopt = 0.05,
time_steps = 6, strategy = 'random',
n_sims = 1000, cpus = 6) {
seed_nodes <- NULL
n_seeds <- round(length(V(g)) * p_adopt)
if (strategy == 'random') {
seed_nodes <- sample(length(V(g)), n_seeds)
}
else if (strategy == 'central') {
vlist <- data.table(vid = V(g), bc = V(g)$bc)[order(-bc)]
seed_nodes <- vlist[1:n_seeds]$vid
}
diff_object <- rdiffnet_multiple(R = n_sims,
statistic = function(x) colSums(x$cumadopt),
t = time_steps,
seed.graph = as_adjacency_matrix(g),
threshold.dist = threshold,
seed.nodes = seed_nodes,
ncpus = cpus)
# return the cumulative adopters for each time step
results <- as.data.table(t(diff_object))
setnames(results, paste0('t_',1:time_steps))
results[, strategy := strategy]
results[, alpha := g$power]
results[, threshold := threshold]
return(results)
}
# ------------- run simulations
threshold_list <- c(0.1,0.25, 0.5)
random_strategy_data <- lapply(threshold_list, function(thr) {
lapply(sfn_list_attr, function(g) {
run_diffusion_sim(g, thr)
})
})
targeted_strategy_data <- lapply(threshold_list, function(thr) {
lapply(sfn_list_attr, function(g) {
run_diffusion_sim(g, thr, strategy = 'central')
})
})
# ------------- collect data
random_strategy_all <- rbindlist(lapply(random_strategy_data, rbindlist))
targeted_strategy_all <- rbindlist(lapply(targeted_strategy_data, rbindlist))
```
## Visualize differences in diffusion patterns over networks
```{r net_viz}
# ------------- organize data
sim_data <- rbind(random_strategy_all, targeted_strategy_all)
sim_data_m <- melt(sim_data, id.vars = c('threshold','alpha','strategy'),
variable.factor = FALSE)
sim_data_m[, time := as.numeric(substr(variable,3,nchar(variable)))]
sim_data_m[, `influence threshold` := as.factor(threshold)]
# ------------- get summary statistics
sim_data_summ <- sim_data_m[, .(q25 = quantile(value, 0.25),
q50 = quantile(value, 0.50),
q75 = quantile(value, 0.75)),
by = .(`influence threshold`, alpha, strategy, time)]
# ------------- plot
p_res <- ggplot(sim_data_summ, aes(time, q50,
linetype = strategy,
color = `influence threshold`)) +
geom_line(size = 0.3) +
geom_errorbar(aes(ymin = q25, ymax = q75), width = 0.1) +
facet_wrap(~alpha, ncol = 2) +
theme_bw() +
scale_y_continuous('adopters') +
scale_x_continuous('months') +
theme(legend.position = 'bottom')
ggsave('p_res.png',p_res)
```
## Test for differences between strategies
```{r test_diff}
# get p-values for each comparison of random vs central strategy
# adjust for multiple comparisons (bonferroni)
sim_data_m[time > 1, t.test(.SD[strategy == 'random']$value,
.SD[strategy == 'central']$value),
by = .(alpha, threshold, time)]$p.value * 120
```
## Identify communities
```{r comm}
op <- par()
png('p_comm.png')
par(mfrow=c(2,2))
lapply(sfn_list_attr,
function(g) {
plot(
edge.betweenness.community(g),
g,
vertex.label = NA,
vertex.size = 1.9,
main = paste0('α = ', g$power)
)
})
dev.off()
par(op)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment