Created
July 18, 2017 20:19
-
-
Save mathieubray/8777a03c12cc5c09c1a1b955b482d24c to your computer and use it in GitHub Desktop.
Worked Example for Kidney Paired-Donation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) # Take that, base R | |
library(purrr) # 'map' functions | |
library(igraph) # R graph framework | |
library(ggplot2) # For plotting | |
library(ggraph) # Plotting graphs | |
library(data.table) # 'rbindlist' to combine list of data frames into one data frame | |
library(ompr) # Mixed integer programming | |
library(ompr.roi) | |
library(ROI) # R optimization interface | |
library(ROI.plugin.glpk) # Will use solver 'glpk' | |
### KPD Example ### | |
set.seed(901) # Arbitrary seed, seemed to generate a decent network for this demo | |
number_of_nodes <- 20 | |
kpd <- sample_gnp(number_of_nodes, 0.1, directed=TRUE) # Here we generate our KPD as a Erdos-Renyi random graph | |
# Plot graph | |
ggraph(kpd,layout='linear',circular=TRUE) + | |
geom_edge_link(aes(start_cap=circle(5,"mm"), | |
end_cap=circle(5,"mm")), | |
show.legend=FALSE, | |
arrow=arrow(angle=20, | |
length=unit(0.1,"inches"), | |
type="closed")) + | |
geom_node_point(size=12,color="red",alpha=0.6) + | |
geom_node_text(aes(label=1:20),size=5,color="black") + | |
theme_graph() | |
# Retrieve edges in the network | |
edges <- kpd %>% get.edgelist %>% data.frame | |
head(edges) | |
# Depth-First Search for Cycles in a Network | |
get_cycles <- function(nodes, edgelist, max_cycle_size){ | |
cycle_list <- list() # Collect found cycles | |
cycles <- 0 # Count cycles | |
# Does edge exist in network from 'from' node to 'to' node? | |
incidence <- function(from,to){ | |
return(edgelist %>% filter(X1==from,X2==to) %>% nrow > 0) | |
} | |
# Retrieve first unvisited child of 'current' node (after 'from' node) | |
# Returns -1 if no child found | |
get_child <- function(from, current, visited){ | |
if (from != nodes){ | |
for (j in (from + 1):nodes){ | |
if (incidence(current,j) & !visited[j]){ | |
return(j) | |
} | |
} | |
} | |
return(-1) | |
} | |
# Use stack to store and evaluate current set of nodes | |
node_stack <- numeric() | |
# Keep note of the first node in the stack (head node) | |
current_head_node <- 1 | |
# Keep track of whether each node has been explored already | |
visited <- rep(FALSE, nodes) | |
# Iterate over all nodes | |
while (current_head_node <= nodes){ | |
# Place new head node in the stack and mark as visited | |
node_stack <- c(node_stack, current_head_node) | |
visited[current_head_node] <- TRUE | |
# Get the first child of this node | |
v <- get_child(current_head_node, current_head_node, visited) | |
while(length(node_stack) > 0){ | |
# If there are no children to explore | |
if (v == -1){ | |
# Pop node from stack | |
backtrack_node <- node_stack %>% tail(1) | |
node_stack <- node_stack %>% head(-1) | |
if (backtrack_node == current_head_node){ | |
# We've popped the head node from the stack, which is now empty | |
# Break from the loop and start a new stack with a new head node | |
visited[backtrack_node] <- FALSE | |
break | |
} | |
# Mark the popped node as not visited | |
visited[backtrack_node] <- FALSE | |
# Get new node from top of stack and get its next unvisited child | |
new_top_node <- node_stack %>% tail(1) | |
v <- get_child(backtrack_node, new_top_node, visited) | |
} else { | |
# Place new node on top of stack and mark as visited | |
node_stack <- c(node_stack, v) | |
visited[v] <- TRUE | |
# If this node connects back to the head node, we've found a cycle! | |
if (incidence(v,current_head_node)){ | |
# Check size constraints (may be unnecessary...) | |
if (length(node_stack) <= max_cycle_size){ | |
# Add cycle to list | |
cycles <- cycles + 1 | |
cycle_list[[cycles]] <- node_stack | |
} | |
} | |
if (length(node_stack) >= max_cycle_size){ | |
# If stack has reached the maximum cycle size, signal that there are no | |
# more children to explore | |
v <- -1 | |
} else { | |
# Else, grow stack with child of current node | |
v <- get_child(current_head_node, v, visited) | |
} | |
} | |
} | |
# Advance head node | |
current_head_node <- current_head_node + 1 | |
} | |
return(cycle_list) | |
} | |
# Collect all the cycles in the network | |
cycle_size <- 3 | |
cycles <- get_cycles(number_of_nodes, edges, cycle_size) | |
number_of_cycles <- length(cycles) | |
# Assign a label to each cycle | |
cycle_strings <- map_chr(cycles, function(x){ paste(x, collapse="-")}) | |
# Collect separately the size of each of the cycles in the network | |
transplants <- purrr::map_int(cycles,length) | |
data.frame(cycle_strings=cycle_strings, transplants=transplants) | |
# For a vector representing a cycle, build data frame of the edges | |
get_cycle_edges <- function(cycle_node_list){ | |
edge_list <- data.frame(X1=numeric(), X2=numeric()) | |
cycle_length <- length(cycle_node_list) | |
# Caution: FOR loop :( :( :( | |
for(i in 1:(cycle_length - 1)){ | |
edge_list <- rbind(edge_list, data.frame(X1 = cycle_node_list[i], X2 = cycle_node_list[i+1])) | |
} | |
edge_list <- rbind(edge_list, data.frame(X1 = cycle_node_list[cycle_length], X2 = cycle_node_list[1])) | |
} | |
# Get all edges that appear in cycles | |
cycle_edges <- map(cycles, get_cycle_edges) %>% | |
rbindlist %>% | |
unique %>% | |
as_tibble %>% | |
mutate(in.cycle = TRUE) | |
# Mark each edge whether it appears in a cycle or not | |
edges_enhanced <- edges %>% | |
left_join(cycle_edges,by=c("X1","X2")) %>% | |
mutate(in.cycle = !is.na(in.cycle)) | |
E(kpd)$Cycle <- edges_enhanced$in.cycle # Assign property to edge | |
head(edges_enhanced) | |
# Get all nodes that appear in cycles | |
cycle_nodes <- cycles %>% unlist %>% unique %>% sort | |
# Mark each node whether it appears in a cycle or not | |
node_in_cycle <- 1:number_of_nodes %in% cycle_nodes | |
V(kpd)$Cycle <- node_in_cycle # Assign property to node | |
# Plot network with nodes and edges in cycles highlighted | |
ggraph(kpd, layout = 'circle') + | |
geom_edge_link(aes(edge_colour=Cycle, | |
edge_width=Cycle, | |
edge_linetype=Cycle, | |
start_cap=circle(5,"mm"), | |
end_cap=circle(5,"mm")), | |
arrow=arrow(angle=20, | |
length=unit(0.1,"inches"), | |
type="closed")) + | |
geom_node_point(aes(color=Cycle),size=12,alpha=0.6,show.legend=F) + | |
geom_node_text(aes(label=1:20),size=5,color="black") + | |
scale_color_manual(values=c("red","magenta")) + | |
scale_edge_color_manual(values=c("black","magenta")) + | |
scale_edge_width_manual(values=c(0.5,1)) + | |
scale_edge_linetype_manual(values=c("dotted","solid")) + | |
theme_graph() | |
# Function returns whether or not a node appears in a cycle | |
cycle_contains_node <- function(cycle,node){ | |
purrr::map2_lgl(cycle,node, ~ .y %in% cycles[[.x]]) # A little confusing, but .x = 'cycle', .y = 'node' here | |
} | |
model <- MIPModel() %>% | |
add_variable(y[i], i = 1:number_of_cycles, type = "binary") %>% | |
add_variable(x[i,j], i = 1:number_of_cycles, j = 1:number_of_nodes, type = "binary") %>% | |
set_objective(sum_expr(transplants[i] * y[i], i = 1:number_of_cycles), "max") %>% | |
add_constraint(sum_expr(x[i,j], i = 1:number_of_cycles) <= 1, j = 1:number_of_nodes) %>% | |
add_constraint(x[i,j] == y[i], i = 1:number_of_cycles, j = 1:number_of_nodes, cycle_contains_node(i,j)) %>% | |
solve_model(with_ROI(solver = "glpk", verbosity = 1)) | |
response <- model %>% | |
get_solution(y[i]) %>% | |
select(-variable) %>% | |
mutate(cycle_string = cycle_strings, | |
transplants = transplants) | |
response | |
# Extract chosen cycles | |
chosen_cycles <- response %>% | |
filter(value==1) %>% | |
.$i | |
solution <- cycles[chosen_cycles] | |
# Collect solution edges into data frame | |
solution_edges <- map(solution, get_cycle_edges) | |
for (i in 1:length(solution_edges)){ # Save solution number | |
solution_edges[[i]] <- solution_edges[[i]] %>% | |
mutate(solution = paste("Solution Cycle",i)) | |
} | |
solution_edges <- solution_edges %>% | |
rbindlist %>% | |
as_tibble | |
# Mark edges with the solution they appear in | |
edge_in_solution <- edges %>% | |
left_join(solution_edges,by=c("X1","X2")) %>% | |
mutate(solution = ifelse(is.na(solution),"Not Selected",solution)) | |
E(kpd)$Solution <- edge_in_solution$solution | |
# Mark nodes with the solution they appear in | |
node_in_solution <- data.frame(X1=1:20) %>% | |
left_join(solution_edges, by="X1") %>% | |
mutate(solution=ifelse(is.na(solution),"Not Selected",solution)) | |
V(kpd)$Solution <- node_in_solution$solution | |
# Plot network | |
ggraph(kpd, layout = 'circle') + | |
geom_edge_link(aes(edge_colour=Solution, | |
edge_width=Solution, | |
edge_linetype=Cycle, | |
start_cap=circle(5,"mm"), | |
end_cap=circle(5,"mm")), | |
arrow=arrow(angle=20, | |
length=unit(0.1,"inches"), | |
type="closed")) + | |
geom_node_point(aes(color=Solution),size=12,alpha=0.6,show.legend = FALSE) + | |
geom_node_text(aes(label=1:20),size=5,color="black") + | |
scale_color_manual(values=c("red","magenta","blue","green")) + | |
scale_edge_color_manual(values=c("black","magenta","blue","green")) + | |
scale_edge_width_manual(values=c(0.5,rep(1,times=3))) + | |
scale_edge_linetype_manual(values=c("dotted",rep("solid",times=3))) + | |
theme_graph() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment