gweissman/net_diff_sim.Rmd

## net_diff_sim.Rmd
---
title: 'Aim 2: Simulated prelim data for Roybal Proposal'
author: "Gary E. Weissman, MD, MSHP"
date: "8/29/2018"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(igraph)
library(ggplot2)
library(ggnetwork)
library(patchwork)
library(netdiffuseR)
library(data.table)
set.seed(30)
```

## Background

## Create scale-free networks for simulation

```{r make_graphs}
N <- 250
power_law_vec <- c(1.25, 1.55, 1.75, 2.0)
sfn_list_raw <- lapply(power_law_vec,
                   function(p) sample_pa(N, p, directed = FALSE))

# ------------- calculate importance measures
# pagerank measures
sfn_list_attr <- lapply(sfn_list_raw,
                        function(g) {V(g)$pr <- igraph::page_rank(g)$vector; g})
# betweenness centrality
sfn_list_attr <- lapply(sfn_list_attr,
                        function(g) {V(g)$bc <- igraph::betweenness(g); g})
```

## Visualize network structures and importance properties

```{r view_graphs}
# ------------- custom plot function
# takes an igraph object and returns a ggplot object
plot_graph <- function(g, node_color = NULL) {
  p <- ggplot(ggnetwork(g,
                   layout = "fruchtermanreingold",
                   arrow.gap = 0.01),
         aes(x = x, y = y, xend = xend, yend = yend)) +
    geom_nodes(size = 2, color = 'black', fill = 'white') +
    geom_edges(size = 0.2,
               arrow = arrow(length = unit(2, "pt"))) +
    scale_color_continuous(low = 'lightgray', high = 'red') +
    theme_blank() +
    ggtitle(paste0('α = ',g$power))

  if (is.null(node_color)) {
    p <- p + geom_nodes(size = 2)
  } else if (node_color == 'pr') {
    p <- p + geom_nodes(size = 2, aes(color = pr))
  } else if (node_color == 'bc') {
    p <- p + geom_nodes(size = 2, aes(color = bc))
  }
  return(p)
}
# ------------- make individual graph plots
sfn_plot_list_plain <- lapply(sfn_list_attr, plot_graph)
sfn_plot_list_pr <- lapply(sfn_list_attr, plot_graph, node_color = 'pr')
sfn_plot_list_bc <- lapply(sfn_list_attr, plot_graph, node_color = 'bc')

# ------------- make group graph plots
comp_plot_plain <- Reduce('+', sfn_plot_list_plain)
ggsave('p_comp_plain.png', comp_plot_plain)
comp_plot_pr <- Reduce('+', sfn_plot_list_pr)
ggsave('p_comp_pr.png', comp_plot_pr)
comp_plot_bc <- Reduce('+', sfn_plot_list_bc)
ggsave('p_comp_bc.png', comp_plot_bc)
```

## Simulate diffusion process over networks

```{r sim_diff}
# ------------- custom diffusion function
run_diffusion_sim <- function(g, threshold, p_adopt = 0.05,
                              time_steps = 6, strategy = 'random',
                              n_sims = 1000, cpus = 6) {
  seed_nodes <- NULL
  n_seeds <- round(length(V(g)) * p_adopt)
  if (strategy == 'random') {
    seed_nodes <- sample(length(V(g)), n_seeds)
  }
  else if (strategy == 'central') {
    vlist <- data.table(vid = V(g), bc = V(g)$bc)[order(-bc)]
    seed_nodes <- vlist[1:n_seeds]$vid
  }
  diff_object <- rdiffnet_multiple(R = n_sims,
                                   statistic = function(x) colSums(x$cumadopt),
                                   t = time_steps,
                          seed.graph = as_adjacency_matrix(g),
                          threshold.dist = threshold,
                          seed.nodes = seed_nodes,
                          ncpus = cpus)
  # return the cumulative adopters for each time step
  results <- as.data.table(t(diff_object))
  setnames(results, paste0('t_',1:time_steps))
  results[, strategy := strategy]
  results[, alpha := g$power]
  results[, threshold := threshold]
  return(results)
}

# ------------- run simulations
threshold_list <- c(0.1,0.25, 0.5)
random_strategy_data <- lapply(threshold_list, function(thr) {
  lapply(sfn_list_attr, function(g) {
    run_diffusion_sim(g, thr)
  })
})
targeted_strategy_data <- lapply(threshold_list, function(thr) {
  lapply(sfn_list_attr, function(g) {
    run_diffusion_sim(g, thr, strategy = 'central')
  })
})

# ------------- collect data
random_strategy_all <- rbindlist(lapply(random_strategy_data, rbindlist))
targeted_strategy_all <- rbindlist(lapply(targeted_strategy_data, rbindlist))
```

## Visualize differences in diffusion patterns over networks

```{r net_viz}
# ------------- organize data
sim_data <- rbind(random_strategy_all, targeted_strategy_all)
sim_data_m <- melt(sim_data, id.vars = c('threshold','alpha','strategy'),
                   variable.factor = FALSE)
sim_data_m[, time := as.numeric(substr(variable,3,nchar(variable)))]
sim_data_m[, `influence threshold` := as.factor(threshold)]
# ------------- get summary statistics
sim_data_summ <- sim_data_m[, .(q25 = quantile(value, 0.25),
               q50 = quantile(value, 0.50),
               q75 = quantile(value, 0.75)),
           by = .(`influence threshold`, alpha, strategy, time)]
# ------------- plot
p_res <- ggplot(sim_data_summ, aes(time, q50,
                       linetype = strategy,
                       color = `influence threshold`)) +
  geom_line(size = 0.3) +
  geom_errorbar(aes(ymin = q25, ymax = q75), width = 0.1) +
  facet_wrap(~alpha, ncol = 2) +
  theme_bw() +
  scale_y_continuous('adopters') +
  scale_x_continuous('months') +
  theme(legend.position = 'bottom')
ggsave('p_res.png',p_res)
```

## Test for differences between strategies

```{r test_diff}
# get p-values for each comparison of random vs central strategy
# adjust for multiple comparisons (bonferroni)
sim_data_m[time > 1, t.test(.SD[strategy == 'random']$value,
                            .SD[strategy == 'central']$value),
                            by = .(alpha, threshold, time)]$p.value * 120
```

## Identify communities

```{r comm}
op <- par()
png('p_comm.png')
par(mfrow=c(2,2))
lapply(sfn_list_attr,
       function(g) {
       plot(
       edge.betweenness.community(g),
       g,
       vertex.label = NA,
       vertex.size = 1.9,
       main = paste0('α = ', g$power)
       )
       })
dev.off()
par(op)
```
	---
	title: 'Aim 2: Simulated prelim data for Roybal Proposal'
	author: "Gary E. Weissman, MD, MSHP"
	date: "8/29/2018"
	output: html_document
	---

	```{r setup, include=FALSE}
	knitr::opts_chunk$set(echo = TRUE)
	library(igraph)
	library(ggplot2)
	library(ggnetwork)
	library(patchwork)
	library(netdiffuseR)
	library(data.table)
	set.seed(30)
	```

	## Background

	## Create scale-free networks for simulation

	```{r make_graphs}
	N <- 250
	power_law_vec <- c(1.25, 1.55, 1.75, 2.0)
	sfn_list_raw <- lapply(power_law_vec,
	function(p) sample_pa(N, p, directed = FALSE))

	# ------------- calculate importance measures
	# pagerank measures
	sfn_list_attr <- lapply(sfn_list_raw,
	function(g) {V(g)$pr <- igraph::page_rank(g)$vector; g})
	# betweenness centrality
	sfn_list_attr <- lapply(sfn_list_attr,
	function(g) {V(g)$bc <- igraph::betweenness(g); g})
	```

	## Visualize network structures and importance properties

	```{r view_graphs}
	# ------------- custom plot function
	# takes an igraph object and returns a ggplot object
	plot_graph <- function(g, node_color = NULL) {
	p <- ggplot(ggnetwork(g,
	layout = "fruchtermanreingold",
	arrow.gap = 0.01),
	aes(x = x, y = y, xend = xend, yend = yend)) +
	geom_nodes(size = 2, color = 'black', fill = 'white') +
	geom_edges(size = 0.2,
	arrow = arrow(length = unit(2, "pt"))) +
	scale_color_continuous(low = 'lightgray', high = 'red') +
	theme_blank() +
	ggtitle(paste0('α = ',g$power))

	if (is.null(node_color)) {
	p <- p + geom_nodes(size = 2)
	} else if (node_color == 'pr') {
	p <- p + geom_nodes(size = 2, aes(color = pr))
	} else if (node_color == 'bc') {
	p <- p + geom_nodes(size = 2, aes(color = bc))
	}
	return(p)
	}
	# ------------- make individual graph plots
	sfn_plot_list_plain <- lapply(sfn_list_attr, plot_graph)
	sfn_plot_list_pr <- lapply(sfn_list_attr, plot_graph, node_color = 'pr')
	sfn_plot_list_bc <- lapply(sfn_list_attr, plot_graph, node_color = 'bc')

	# ------------- make group graph plots
	comp_plot_plain <- Reduce('+', sfn_plot_list_plain)
	ggsave('p_comp_plain.png', comp_plot_plain)
	comp_plot_pr <- Reduce('+', sfn_plot_list_pr)
	ggsave('p_comp_pr.png', comp_plot_pr)
	comp_plot_bc <- Reduce('+', sfn_plot_list_bc)
	ggsave('p_comp_bc.png', comp_plot_bc)
	```

	## Simulate diffusion process over networks

	```{r sim_diff}
	# ------------- custom diffusion function
	run_diffusion_sim <- function(g, threshold, p_adopt = 0.05,
	time_steps = 6, strategy = 'random',
	n_sims = 1000, cpus = 6) {
	seed_nodes <- NULL
	n_seeds <- round(length(V(g)) * p_adopt)
	if (strategy == 'random') {
	seed_nodes <- sample(length(V(g)), n_seeds)
	}
	else if (strategy == 'central') {
	vlist <- data.table(vid = V(g), bc = V(g)$bc)[order(-bc)]
	seed_nodes <- vlist[1:n_seeds]$vid
	}
	diff_object <- rdiffnet_multiple(R = n_sims,
	statistic = function(x) colSums(x$cumadopt),
	t = time_steps,
	seed.graph = as_adjacency_matrix(g),
	threshold.dist = threshold,
	seed.nodes = seed_nodes,
	ncpus = cpus)
	# return the cumulative adopters for each time step
	results <- as.data.table(t(diff_object))
	setnames(results, paste0('t_',1:time_steps))
	results[, strategy := strategy]
	results[, alpha := g$power]
	results[, threshold := threshold]
	return(results)
	}

	# ------------- run simulations
	threshold_list <- c(0.1,0.25, 0.5)
	random_strategy_data <- lapply(threshold_list, function(thr) {
	lapply(sfn_list_attr, function(g) {
	run_diffusion_sim(g, thr)
	})
	})
	targeted_strategy_data <- lapply(threshold_list, function(thr) {
	lapply(sfn_list_attr, function(g) {
	run_diffusion_sim(g, thr, strategy = 'central')
	})
	})

	# ------------- collect data
	random_strategy_all <- rbindlist(lapply(random_strategy_data, rbindlist))
	targeted_strategy_all <- rbindlist(lapply(targeted_strategy_data, rbindlist))
	```

	## Visualize differences in diffusion patterns over networks

	```{r net_viz}
	# ------------- organize data
	sim_data <- rbind(random_strategy_all, targeted_strategy_all)
	sim_data_m <- melt(sim_data, id.vars = c('threshold','alpha','strategy'),
	variable.factor = FALSE)
	sim_data_m[, time := as.numeric(substr(variable,3,nchar(variable)))]
	sim_data_m[, `influence threshold` := as.factor(threshold)]
	# ------------- get summary statistics
	sim_data_summ <- sim_data_m[, .(q25 = quantile(value, 0.25),
	q50 = quantile(value, 0.50),
	q75 = quantile(value, 0.75)),
	by = .(`influence threshold`, alpha, strategy, time)]
	# ------------- plot
	p_res <- ggplot(sim_data_summ, aes(time, q50,
	linetype = strategy,
	color = `influence threshold`)) +
	geom_line(size = 0.3) +
	geom_errorbar(aes(ymin = q25, ymax = q75), width = 0.1) +
	facet_wrap(~alpha, ncol = 2) +
	theme_bw() +
	scale_y_continuous('adopters') +
	scale_x_continuous('months') +
	theme(legend.position = 'bottom')
	ggsave('p_res.png',p_res)
	```

	## Test for differences between strategies

	```{r test_diff}
	# get p-values for each comparison of random vs central strategy
	# adjust for multiple comparisons (bonferroni)
	sim_data_m[time > 1, t.test(.SD[strategy == 'random']$value,
	.SD[strategy == 'central']$value),
	by = .(alpha, threshold, time)]$p.value * 120
	```

	## Identify communities

	```{r comm}
	op <- par()
	png('p_comm.png')
	par(mfrow=c(2,2))
	lapply(sfn_list_attr,
	function(g) {
	plot(
	edge.betweenness.community(g),
	g,
	vertex.label = NA,
	vertex.size = 1.9,
	main = paste0('α = ', g$power)
	)
	})
	dev.off()
	par(op)
	```