Skip to content

Instantly share code, notes, and snippets.

@jalapic
Last active August 29, 2015 14:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jalapic/6d175ffe87463d1f332e to your computer and use it in GitHub Desktop.
Save jalapic/6d175ffe87463d1f332e to your computer and use it in GitHub Desktop.
Markov Chains in DiagrammeR
### Markov Chain Diagrams Using DiagrammeR
### Introduction and Sample Data
#' Imagine a sequence of behaviors like below, where each letter (A,I,O,R,S,X,Y) refers to
#' a distinct behavior.
#' AOXXYXXXXXXYXXYXXXXXYXXXXXYXSXXXXAXAOOOXAAAOYXXXXXXSXXXXSXXYXYXXYXXYXXXXXXXXXYXXAAAAAAOAA
#' AOAAAOAAAAAOAAAAAAAAAAAOAAAOAAAOOAAAOAAAAAOOIAOAOAOIAOOOAAARSAAOOOAAAAOAAAOOAOOOAOAAAISAA
#' ARAOOAAAOASAAAAAAOOOAAAAOASOAAAOARSRSOAAOOOAOAAOAAOAAOOAAAAAASAOIASAAOAAOOARASOOARSAOOAAA
#' AOAAAAOAOAAAAAAASAAAAOIARSARARAOAAAOOOAAOAAOOAAOOAOAAOIAOOOAOOAAAAAAROSIOAAAOOAAAAOAOOAAO
#' OAAAOOAOAAASOOAOAAAOAOSASOOAOOAROOAROSAOSASOAROOAAAOSOAOOSOAAOSOOAOAAAOOOAOOASOAOSAAAASIO
#' OAOOAAOOAOORSIAOOOAASAOOOAOSOAOARSIXXYXXXXXXXXXXXXXSXXSXXXXXXSYXXXYXXXXXSXXXXXXXXXXXXXXSX
#' YXXYXXXXXS
#' The basic aim is to visualize this sequence in a Markov Chain diagram. Specifcally, we
#' want to show those behavioral transitions that occur at a higher frequency than chance, and
#' we wish to denote behaviors that occur more frequently in some way (e.g. make those nodes
#' bigger, thicker or a different color - or all 3 of these). Likewise, the edges should
#' reflect the weight or likelihood that a behavioral sequence transition occurs.
#' The following are some raw data.
#'
#' Firstly, this is the frequency of each behavior above:
ty<-structure(c(218, 10, 139, 15, 37, 108, 16), .Names = c("A", "I", "O", "R", "S", "X", "Y"))
ty
#' A I O R S X Y
#' 218 10 139 15 37 108 16
#' Secondly, here is a dataframe showing the transition rate between two behaviors and the
#' associated p-value of that transition.
#'
tx1<-
structure(list(Var1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("A", "I",
"O", "R", "S", "X", "Y"), class = "factor"), Var2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L
), .Label = c("A", "I", "O", "R", "S", "X", "Y"), class = "factor"),
value = c(0.541284403669725, 0.6, 0.517985611510791, 0.266666666666667,
0.351351351351351, 0.037037037037037, 0, 0.00458715596330275,
0, 0.0359712230215827, 0, 0.108108108108108, 0, 0, 0.334862385321101,
0.2, 0.352517985611511, 0.266666666666667, 0.297297297297297,
0, 0, 0.0596330275229358, 0, 0.00719424460431655, 0, 0.027027027027027,
0, 0, 0.055045871559633, 0.1, 0.0647482014388489, 0.466666666666667,
0, 0.0833333333333333, 0, 0.00458715596330275, 0.1, 0.0143884892086331,
0, 0.189189189189189, 0.75, 1, 0, 0, 0.00719424460431655,
0, 0.027027027027027, 0.12962962962963, 0), p = c(0, 0, 0,
0, 0, 0, 0.01, 0.016, 0.665, 0.081, 0.595, 0.589, 0.134,
0.582, 0.09, 0, 0.009, 0.005, 0, 0, 0.021, 0.003, 0.596,
0.044, 0.515, 0.083, 0.054, 0.501, 0, 0, 0.013, 0.141, 0.091,
0.039, 0.228, 0, 0.155, 0, 0.08, 0, 0, 0, 0.001, 0.584, 0.019,
0.501, 0.273, 0.614, 0.487)), .Names = c("Var1", "Var2",
"value", "p"), row.names = c(NA, -49L), class = "data.frame")
head(tx1,10)
#' Var1 Var2 value p
#' 1 A A 0.541284404 0.000
#' 2 I A 0.600000000 0.000
#' 3 O A 0.517985612 0.000
#' 4 R A 0.266666667 0.000
#' 5 S A 0.351351351 0.000
#' 6 X A 0.037037037 0.000
#' 7 Y A 0.000000000 0.010
#' 8 A I 0.004587156 0.016
#' 9 I I 0.000000000 0.665
#' 10 O I 0.035971223 0.081
#' Var1 = first behavior, Var2 = second behavior
#' value = transition probability
#' p = probability that transition probability is significant
#####################
##### PLOTTING
#' Some general points/issues:
#'
#' 1. I have utilized 'x' and 'y' fixed plotting of neato. I want to show all nodes
#' in a series of plots comparing Markov Chains of different sequences. Therefore
#' it is better to have all nodes in the same position for all diagrams.
#' I have tried to pick coordinates that eliminate line crossings, but....
#'
#' 2. Self-loops e.g. A->A and X->X cause the biggest issue of clarity. Is there a way
#' of altering how they are positioned?
#'
#' 3. I don't seem to be able to scale node size by some data. (not shown below)
#'
#' 4. I don't seem to be able to scale nodes by two attributes (e.g. color and penwidth)
#'
#' 5. I can't get render_graph() to work on my machine. I used a workaround (below).
#-------------------------------------------------------------------------------------------------------------------------------------------#
library(dplyr)
#### EXAMPLE 1 - SCALE EDGES
#' I scale edges by transition probability (value in tx1)
# Create a node data frame
nodes <-
create_nodes(nodes = names(ty),
type = "letter",
x=c(1.75,0.25,2.9,3.25,0.6,1,2.5),
y=c(3,3,2,3,2,1,1))
# Create an edges data frame
edgesx <- tx1 %>% filter(value!=0 & p<.005)
edges <- create_edges(from = edgesx$Var1,
to = edgesx$Var2,
label = '',
relationship = "letter_to_letter",
data = edgesx$value)
edges <- scale_edges(edges_df = edges,
to_scale = edges$data,
edge_attr = "penwidth",
range = c(1, 3))
edges
graph <-
create_graph(nodes_df = nodes,
edges_df = edges,
graph_attrs = "layout = neato",
node_attrs = c("fontname = Helvetica",
"style = filled"),
edge_attrs = c("color = gray20",
"arrowsize = 0.5"))
# View the graph in the RStudio Viewer
#render_graph(graph) #for some reason this doesn't produce a viz in the viewer on my machine. There is no error message, it just doesn't show.
grViz(print(render_graph(graph, output = "DOT"))) #this works
#-------------------------------------------------------------------------------------------------------------------------------------------#
#### EXAMPLE 2 - SCALE NODES AND EDGES
#' I scale edges by transition probability (value in tx1)
#' I scale nodes by frequency of occurrence (ty) - penwidth
# Create a node data frame
nodes <-
create_nodes(nodes = names(ty),
type = "letter",
x=c(1.75,0.25,2.9,3.25,0.6,1,2.5),
y=c(3,3,2,3,2,1,1),
data=ty)
nodes <- scale_nodes(nodes_df = nodes,
to_scale = nodes$data,
node_attr = "penwidth",
range = c(2, 5))
# Create an edges data frame
edgesx <- tx1 %>% filter(value!=0 & p<.005)
edges <- create_edges(from = edgesx$Var1,
to = edgesx$Var2,
label = '',
relationship = "letter_to_letter",
data = edgesx$value)
edges <- scale_edges(edges_df = edges,
to_scale = edges$data,
edge_attr = "penwidth",
range = c(1, 5))
edges
graph <-
create_graph(nodes_df = nodes,
edges_df = edges,
graph_attrs = "layout = neato",
node_attrs = c("fontname = Helvetica",
"style = filled"),
edge_attrs = c("color = gray20",
"arrowsize = 0.5"))
# View the graph in the RStudio Viewer
#render_graph(graph) #for some reason this doesn't produce a viz in the viewer on my machine. There is no error message, it just doesn't show.
grViz(print(render_graph(graph, output = "DOT"))) #this works
#-------------------------------------------------------------------------------------------------------------------------------------------#
#### EXAMPLE 3 - NODE AND EDGE SCALING
#' here I scale the edgewith based on transition probablity (value in tx1)
#' I scale the node based on color (but I do this manually - I couldn't work the
#' diagrammer way of doing this)
#' I also try to scale node by penwidth but it doesn't work.
# Create a node data frame
nodes <-
create_nodes(nodes = names(ty),
type = "letter",
shape= "square",
x=c(1.75,0.25,2.9,3.25,0.6,1,2.5),
y=c(3,3,2,3,2,1,1),
data=ty)
nodes$color<-ifelse(nodes$data>100, "Red", "SandyBrown")
nodes <- scale_nodes(nodes_df = nodes,
to_scale = nodes$data,
node_attr = "penwidth",
range = c(2, 5))
# Create an edges data frame
edgesx <- tx1 %>% filter(value!=0 & p<.005)
edges <- create_edges(from = edgesx$Var1,
to = edgesx$Var2,
label = '',
relationship = "letter_to_letter",
data = edgesx$value)
edges <- scale_edges(edges_df = edges,
to_scale = edges$data,
edge_attr = "penwidth",
range = c(1, 5))
edges
graph <-
create_graph(nodes_df = nodes,
edges_df = edges,
graph_attrs = "layout = neato",
node_attrs = c("fontname = Helvetica",
"style = filled"),
edge_attrs = c("color = gray20",
"arrowsize = 0.5"))
# View the graph in the RStudio Viewer
#render_graph(graph) #for some reason this doesn't produce a viz in the viewer on my machine. There is no error message, it just doesn't show.
grViz(print(render_graph(graph, output = "DOT"))) #this works
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment