Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timelyportfolio/b00ffe3d850ff5c9129d to your computer and use it in GitHub Desktop.
Save timelyportfolio/b00ffe3d850ff5c9129d to your computer and use it in GitHub Desktop.
library("igraph")
library("plyr")
library("HiveR")
library("RColorBrewer")
############################################################################################
rm(list = ls())
dataSet <- read.table("./inst/lesmis.txt", header = FALSE, sep = "\t")
############################################################################################
# Create a graph. Use simplify to ensure that there are no duplicated edges or self loops
gD <- simplify(graph.data.frame(dataSet, directed=FALSE))
# Print number of nodes and edges
# vcount(gD)
# ecount(gD)
# Calculate some node properties and node similarities that will be used to illustrate
# different plotting abilities
# Calculate degree for all nodes
degAll <- degree(gD, v = V(gD), mode = "all")
# Calculate betweenness for all nodes
betAll <- betweenness(gD, v = V(gD), directed = FALSE) / (((vcount(gD) - 1) * (vcount(gD)-2)) / 2)
betAll.norm <- (betAll - min(betAll))/(max(betAll) - min(betAll))
node.list <- data.frame(name = V(gD)$name, degree = degAll, betw = betAll.norm)
# Calculate Dice similarities between all pairs of nodes
dsAll <- similarity.dice(gD, vids = V(gD), mode = "all")
# Calculate edge weight based on the node similarity
F1 <- function(x) {data.frame(V4 = dsAll[which(V(gD)$name == as.character(x$V1)), which(V(gD)$name == as.character(x$V2))])}
dataSet.ext <- ddply(dataSet, .variables=c("V1", "V2", "V3"), function(x) data.frame(F1(x)))
rm(degAll, betAll, betAll.norm, F1)
############################################################################################
#Determine node/edge color based on the properties
# Calculate node size
# We'll interpolate node size based on the node betweenness centrality, using the "approx" function
# And we will assign a node size for each node based on its betweenness centrality
approxVals <- approx(c(0.5, 1.5), n = length(unique(node.list$bet)))
nodes_size <- sapply(node.list$bet, function(x) approxVals$y[which(sort(unique(node.list$bet)) == x)])
node.list <- cbind(node.list, size = nodes_size)
rm(approxVals, nodes_size)
# Define node color
# We'll interpolate node colors based on the node degree using the "colorRampPalette" function from the "grDevices" library
library("grDevices")
# This function returns a function corresponding to a collor palete of "bias" number of elements
F2 <- colorRampPalette(c("#F5DEB3", "#FF0000"), bias = length(unique(node.list$degree)), space = "rgb", interpolate = "linear")
# Now we'll create a color for each degree
colCodes <- F2(length(unique(node.list$degree)))
# And we will assign a color for each node based on its degree
nodes_col <- sapply(node.list$degree, function(x) colCodes[which(sort(unique(node.list$degree)) == x)])
node.list <- cbind(node.list, color = nodes_col)
rm(F2, colCodes, nodes_col)
# Assign visual attributes to edges using the same approach as we did for nodes
F2 <- colorRampPalette(c("#FFFF00", "#006400"), bias = length(unique(dataSet.ext$V4)), space = "rgb", interpolate = "linear")
colCodes <- F2(length(unique(dataSet.ext$V4)))
edges_col <- sapply(dataSet.ext$V4, function(x) colCodes[which(sort(unique(dataSet.ext$V4)) == x)])
dataSet.ext <- cbind(dataSet.ext, color = edges_col)
rm(F2, colCodes, edges_col)
############################################################################################
# Assign nodes to axes
# Randomly
nodeAxis <- sample(3, nrow(node.list), replace = TRUE )
node.list <- cbind(node.list, axis = nodeAxis)
rm(nodeAxis)
############################################################################################
#Create a hive plot
source("mod.edge2HPD.R")
hive1 <- mod.edge2HPD(edge_df = dataSet.ext[, 1:2], edge.weight = dataSet.ext[, 3], edge.color = dataSet.ext[, 5], node.color = node.list[,c("name", "color")], node.size = node.list[,c("name", "size")], node.radius = node.list[,c("name", "degree")], node.axis = node.list[,c("name", "axis")])
#sumHPD(hive1)
hive2 <- mineHPD(hive1, option = "remove zero edge")
plotHive(hive2, method = "abs", bkgnd = "white", axLab.pos = 1)
########################################
# Based on hierarchical cluestering
d <- dist(dsAll)
hc <- hclust(d)
#plot(hc)
nodeAxis <- cutree(hc, k = 6)
node.list <- cbind(node.list, axisCl = nodeAxis)
rm(nodeAxis)
hive1 <- mod.edge2HPD(edge_df = dataSet.ext[, 1:2], edge.weight = dataSet.ext[, 3], edge.color = dataSet.ext[, 5], node.color = node.list[,c("name", "color")], node.size = node.list[,c("name", "size")], node.radius = node.list[,c("name", "degree")], node.axis = node.list[,c("name", "axisCl")])
#sumHPD(hive1)
hive2 <- mineHPD(hive1, option = "remove zero edge")
plotHive(hive2, method = "abs", bkgnd = "white", axLab.pos = 1)
mod.edge2HPD <- function(edge_df = NULL, unique.rows = TRUE, axis.cols = NULL, type = "2D", desc = NULL, edge.weight = NULL, edge.color = NULL, node.color = NULL, node.size = NULL, node.radius = NULL, node.axis = NULL)
{
#edge.weight - a list corresponding to edge weights (same order as in edge_df)
#edge.color - a lis corresponding to edge colors (same order as in edge_df)
#node.color - a data frame consisting of two columns: column 1 - node labels, column 2 - node color
#node.size - a data frame consisting of two columns: column 1 - node labels, column 2 - node size
#node.radius - a data frame consisting of two columns: column 1 - node labels, column 2 - node radius
#node.axis - a data frame consisting of two columns: column 1 - node labels, column 2 - node axis
if (is.null(edge_df)){
stop("No edge data provided")
}
if (!is.data.frame(edge_df)){
stop("edge_df is not a data frame")
}
if (unique.rows)
{
nr.old <- nrow(edge_df)
edge_df <- unique(edge_df)
if (nr.old > nrow(edge_df))
cat("\n\t", nr.old - nrow(edge_df), "non-unique data-frame rows removed!\n\n")
}
# Get node labels
lab1 <- as.character(unlist(edge_df[, 1]))
lab2 <- as.character(unlist(edge_df[, 2]))
# Get number of unique nodes
nn <- length(unique(c(lab1, lab2)))
# Define node ID
id <- 1:nn
# Define node label
label <- unique(c(lab1, lab2))
# Create a data frame for node attributes
node.attributes <- data.frame(id, label)
####################################################
# Node size definition
if (!is.null(node.size))
{
if (is.numeric(node.size[, 2]) | is.integer(node.size[, 2]))
{
nSize <- c()
for (i in 1:length(label))
{
indx <- which(as.character(node.size[,1]) == label[i])
if (length(indx[1]) != 0)
nSize = c(nSize, node.size[indx[1],2])
else
{
msg <- paste("No size data provided for the node ", nodes$id[n], ". Value 1 will be assigned to this node!", sep = "")
warning(msg)
nSize = c(nSize, 1)
}
}
node.attributes <- cbind(node.attributes, size = nSize)
rm(i, nSize, indx)
}#is.numeric
else{
stop("Node size is not numeric or integer.")
}
}#is.null
if (is.null(node.size))
{
warning("No data provided for the node size. All nodes will be assigned size 1!")
node.attributes <- cbind(node.attributes, size = rep(1, nn))
}
####################################################
# Node color definition
if (!is.null(node.color))
{
nCol <- c()
for (i in 1:length(label))
{
indx <- which(as.character(node.color[,1]) == label[i])
if (length(indx[1]) != 0)
nCol = c(nCol, as.character(node.color[indx[1],2]))
else
{
msg <- paste("No color data provided for the node ", nodes$id[n], ". Black color will be assigned to this node!", sep = "")
warning(msg)
nCol = c(nCol, "black")
}
}
node.attributes <- cbind(node.attributes, color = nCol)
rm(i, nCol, indx)
}#is.null
if (is.null(node.color))
{
warning("No data provided for the node color. All nodes will be colored black!")
node.attributes <- cbind(node.attributes, color = as.character(rep("black", nn)))
}
####################################################
# Node radius definition
if (!is.null(node.radius))
{
if (is.numeric(node.radius[, 2]) | is.integer(node.radius[, 2]))
{
nSize <- c()
for (i in 1:length(label))
{
indx <- which(as.character(node.radius[,1]) == label[i])
if (length(indx[1]) != 0)
nSize = c(nSize, node.radius[indx[1],2])
else
{
msg <- paste("No raidus data provided for the node ", nodes$id[n], ". Random values will be assigned!", sep = "")
warning(msg)
nSize = c(nSize, sample(nn, 1))
}
}
node.attributes <- cbind(node.attributes, radius = nSize)
rm(i, nSize, indx)
}#is.numeric
else{
stop("Node raidus is not integer.")
}
}#is.null
if (is.null(node.radius))
{
warning("No data provided for the node radius. All nodes will be assigned random radius values")
node.attributes <- cbind(node.attributes, radius = sample(nn, nn))
}
####################################################
# Node axis definition
if (!is.null(node.axis))
{
if (is.integer(node.axis[, 2]))
{
nSize <- c()
for (i in 1:length(label))
{
indx <- which(as.character(node.axis[,1]) == label[i])
if (length(indx[1]) != 0)
nSize = c(nSize, node.axis[indx[1],2])
else
{
msg <- paste("No axis data provided for the node ", nodes$id[n], ". This node will be assigned to axis 1!", sep = "")
warning(msg)
nSize = c(nSize, 1)
}
}
node.attributes <- cbind(node.attributes, axis = nSize)
rm(i, nSize, indx)
}#is.integer
else{
stop("Node axis is not integer.")
}
}#is.null
if (is.null(node.axis))
{
warning("No data provided for the node axis. All nodes will be assigned to axis 1")
node.attributes <- cbind(node.attributes, axis = rep(1, nn))
}
######################################################
# Create HPD object
HPD <- list()
# Define node attributes
HPD$nodes$id <- as.integer(node.attributes$id)
HPD$nodes$lab <- as.character(node.attributes$label)
HPD$nodes$axis <- as.integer(node.attributes$axis)
HPD$nodes$radius <- as.numeric(node.attributes$radius)
HPD$nodes$size <- as.numeric(node.attributes$size)
HPD$nodes$color <- as.character(node.attributes$color)
####################################################
# Get number of edges
ne <- nrow(edge_df)
####################################################
# Edge weight definition
if (!(is.null(edge.weight)))
{
if (length(edge.weight) != nrow(edge_df))
stop("Edge weights are not provided for all edges!")
if (is.numeric(edge.weight) | is.integer(edge.weight))
edge_df <- cbind(edge_df, weight = edge.weight)
else
stop("Edge weight column is not numeric or integer.")
}
if (is.null(edge.weight))
{
warning("No edge weight provided Setting default edge weight to 1")
edge_df <- cbind(edge_df, weight = rep(1, ne))
}
####################################################
# Edge color definition
if (!(is.null(edge.color)))
{
if (length(edge.color) != nrow(edge_df))
stop("Edge colors are not provided for all edges!")
else
edge_df <- cbind(edge_df, color = as.character(edge.color))
}
if (is.null(edge.color))
{
warning("No edge color provided. Setting default edge color to gray")
edge_df <- cbind(edge_df, color = rep("gray", ne))
}
####################################################
# Set up edge list
# Merge by default sorts things and changes the order of edges, so edge list has to stay paired
edge.hlp <- merge(edge_df, node.attributes[, 1:2], by.x = 1, by.y = "label")
edge <- merge(edge.hlp, node.attributes[1:2], by.x = 2, by.y = "label")
HPD$edges$id1 <- as.integer(edge$id.x)
HPD$edges$id2 <- as.integer(edge$id.y)
HPD$edges$weight <- as.numeric(edge$weight)
HPD$edges$color <- as.character(edge$color)
HPD$nodes <- as.data.frame(HPD$nodes)
HPD$edges <- as.data.frame(HPD$edges)
# Add description
if (is.null(desc)) {
desc <- "No description provided"
}
HPD$desc <- desc
# Define axis columns
if (is.null(axis.cols)){
axis.cols <- brewer.pal(length(unique(HPD$nodes$axis)), "Set1")
}
HPD$axis.cols <- axis.cols
HPD$nodes$axis <- as.integer(HPD$nodes$axis)
HPD$nodes$size <- as.numeric(HPD$nodes$size)
HPD$nodes$color <- as.character(HPD$nodes$color)
HPD$nodes$lab <- as.character(HPD$nodes$lab)
HPD$nodes$radius <- as.numeric(HPD$nodes$radius)
HPD$nodes$id <- as.integer(HPD$nodes$id)
HPD$edges$id1 <- as.integer(HPD$edges$id1)
HPD$edges$id2 <- as.integer(HPD$edges$id2)
HPD$edges$weight <- as.numeric(HPD$edges$weight)
HPD$edges$color <- as.character(HPD$edges$color)
HPD$type <- type
class(HPD) <- "HivePlotData"
# Check HPD object
chkHPD(HPD)
return (HPD)
}
mod.mineHPD <- function(HPD, option = "", radData = NULL)
{
edges <- HPD$edges
nodes <- HPD$nodes
nn <- length(nodes$id)
### ++++++++++++++++++++++++++++++++++++++++++++++++++++ ###
if (option == "axis <- source.man.sink") {
# A change that allows this function to be used for undirected graphs
# Now all nodes will be assigned to an axis
done <- FALSE # a check to make sure all nodes get an axis
for (n in 1:nn) {
id1 <- which(n ==edges$id1)
id2 <- which(n ==edges$id2)
if ((length(id1) == 0) & (length(id2) > 0 )) {
nodes$axis[n] <- 2
done <- TRUE
next
} # these are sinks, as they only receive an edge
# note that set operations below drop duplicate values
#Change 1 starts here
if (length(id1) > 0)
{
if (length(id2) == 0)
{
nodes$axis[n] <- 1
done <- TRUE
next
}
else
{
#Change 1 ends here
common <- union(id1, id2)
source <- setdiff(id1, common)
if (length(source) == 1) {
nodes$axis[n] <- 1
done <- TRUE
next
} # these are sources
if (length(common) >= 1) {
nodes$axis[n] <- 3
done <- TRUE
next
} # these are managers
}
}
if (!done) {
msg <- paste("node ", nodes$id[n], " was not assigned to an axis", sep = "")
warning(msg)
} # alert the user there was a problem
} # end of loop inspecting nodes
nodes$axis <- as.integer(nodes$axis)
} ##### end of option == "axis <- source.man.sink
### ++++++++++++++++++++++++++++++++++++++++++++++++++++ ###
if (option == "rad <- random") {
# This option assigns a random radius value to a node
for (n in 1:nn)
nodes$radius[n] <- sample(1:nn, 1)
} ##### end of option == "rad <- random"
### ++++++++++++++++++++++++++++++++++++++++++++++++++++ ###
if (option == "rad <- userDefined") {
# This option assigns a radius value to a node
# based upon user specified values.
if (is.null(radData)){
stop("No edge data provided")
}
if (length(intersect(as.character(radData[,1]), as.character(nodes$lab))) == 0){
stop("Provided data does not contain correct node labels")
}
for (n in 1:nn)
{
indexHlp <- which(as.character(radData[,1]) == nodes$lab[n])
if (length(indexHlp) != 0)
nodes$radius[n] <- radData[indexHlp[1], 2]
else
{
msg <- paste("No data provided for the node ", nodes$id[n], ". Value 1 will be assigned to this node!", sep = "")
warning(msg)
nodes$radius[n] <- 1
}
}
} ##### end of option == "rad <- userDefined"
### ++++++++++++++++++++++++++++++++++++++++++++++++++++ ###
if (option == "axis <- deg_one_two_more")
{
# This option assigns a node to an axis
# based upon whether its degree is 1, 2, or greater than two
#
# degree 1 = axis 1, degree 2 = axis 2, degree >2 = axis3
done <- FALSE # a check to make sure all nodes get an axis
for (n in 1:nn)
{
id1 <- which(n ==edges$id1)
id2 <- which(n ==edges$id2)
if ((length(id1) + length(id2)) == 1)
{
nodes$axis[n] <- 1
done <- TRUE
next
}
if ((length(id1) + length(id2)) == 2)
{
nodes$axis[n] <- 2
done <- TRUE
next
}
if ((length(id1) + length(id2)) > 2)
{
nodes$axis[n] <- 3
done <- TRUE
next
}
if (!done) {
msg <- paste("node ", nodes$id[n], " was not assigned to an axis", sep = "")
warning(msg)
} # alert the user there was a problem
} # end of loop inspecting nodes
nodes$axis <- as.integer(nodes$axis)
} ##### end of option == "axis <- deg_1_2_more
### ++++++++++++++++++++++++++++++++++++++++++++++++++++ ###
if (option == "axis <- deg_five_ten_more")
{
# This option assigns a node to an axis
# based upon whether its degree is <=5, 6-10, or greater than 10
#
# degree <=5 = axis 1, degree between 6 and 10 = axis 2, degree >10 = axis32
done <- FALSE # a check to make sure all nodes get an axis
for (n in 1:nn)
{
id1 <- which(n ==edges$id1)
id2 <- which(n ==edges$id2)
if ((length(id1) + length(id2)) <= 5)
{
nodes$axis[n] <- 1
done <- TRUE
next
}
if (((length(id1) + length(id2)) > 5) & ((length(id1) + length(id2)) <= 10))
{
nodes$axis[n] <- 2
done <- TRUE
next
}
if ((length(id1) + length(id2)) > 10)
{
nodes$axis[n] <- 3
done <- TRUE
next
}
if (!done) {
msg <- paste("node ", nodes$id[n], " was not assigned to an axis", sep = "")
warning(msg)
} # alert the user there was a problem
} # end of loop inspecting nodes
nodes$axis <- as.integer(nodes$axis)
} ##### end of option == "axis <- deg_five_ten_more"
### ++++++++++++++++++++++++++++++++++++++++++++++++++++ ###
if (option == "remove axis edge") {
# This option removes edges which start and end on the same axis
# It re-uses code from sumHPD
# Create a list of edges to be drawn
n1.lab <- n1.rad <- n2.lab <- n2.rad <- n1.ax <- n2.ax <- c()
for (n in 1:(length(HPD$edges$id1))) {
i1 <- which(HPD$edges$id1[n] == HPD$nodes$id)
i2 <- which(HPD$edges$id2[n] == HPD$nodes$id)
n1.lab <- c(n1.lab, HPD$nodes$lab[i1])
n2.lab <- c(n2.lab, HPD$nodes$lab[i2])
n1.rad <- c(n1.rad, HPD$nodes$radius[i1])
n2.rad <- c(n2.rad, HPD$nodes$radius[i2])
n1.ax <- c(n1.ax, HPD$nodes$axis[i1])
n2.ax <- c(n2.ax, HPD$nodes$axis[i2])
}
fd <- data.frame(
n1.id = HPD$edges$id1,
n1.ax,
n1.lab,
n1.rad,
n2.id = HPD$edges$id2,
n2.ax,
n2.lab,
n2.rad,
e.wt = HPD$edges$weight,
e.col = HPD$edges$color)
prob <- which(fd$n1.ax == fd$n2.ax)
if (length(prob) == 0) cat("\n\t No edges were found that start and end on the same axis\n")
if (length(prob) > 0) {
edges <- edges[-prob,]
cat("\n\t", length(prob), "edges that start and end on the same axis were removed\n")
}
} ##### end of option == "remove axis edge"
### ++++++++++++++++++++++++++++++++++++++++++++++++++++ ###
if (option == "axis <- split") {
# This option splits all axes into 2 new axes
# It can be used to address the "edge on the same axis" issue
# This option may increase the number of nodes - a single node from the parent axis may appear on 2 "children" axes
nodesNew <- nodes
nodesOld <- nodes
nAxes <- unique(nodes$axis)
numAxes <- length(nAxes)
#Renumerate axes
for (i in numAxes:1)
nodesOld[which(nodesOld$axis == nAxes[i]), "axis"] <- as.integer(2*nAxes[i] - 1)
#Duplicate nodes
#Renumerate axes
for (i in numAxes:1)
nodesNew[which(nodesNew$axis == nAxes[i]), "axis"] <- as.integer(2*nAxes[i])
#Re-numerate node ids
nodesNew$id <- nodesNew$id + nn
#Duplicated set of nodes with correct axis and node ids
nodes <- rbind(nodesOld, nodesNew)
rm(nodesOld, nodesNew)
#Now create duplicated set of edges and re-numerate node ids for interactions
edgesNew1 <- edges
edgesNew1$id1 <- edgesNew1$id1 + nn
edgesNew1$id2 <- edgesNew1$id2 + nn
edgesNew2 <- edges
edgesNew2$id1 <- edgesNew2$id1 + nn
edgesNew3 <- edges
edgesNew3$id2 <- edgesNew3$id2 + nn
edges <- rbind(edges, edgesNew1, edgesNew2, edgesNew3)
nodesAxis <- nodes[, c("id", "axis")]
edgesHlp <- merge(edges, nodesAxis, by.x = "id1", by.y = "id")
edges <- merge(edgesHlp, nodesAxis, by.x = "id2", by.y = "id")
edgesOK <- edges[((edges$axis.x == 1) & (edges$axis.y == 2*numAxes)) | ((edges$axis.x == 2*numAxes) & (edges$axis.y == 1)), ]
edgesHlp <- edgesOK
if (numAxes > 1)
for (i in 1:(numAxes - 1))
{
edgesOK <- edges[((edges$axis.x == 2*i) & (edges$axis.y == (2*i + 1))) | ((edges$axis.x == (2*i + 1)) & (edges$axis.y == 2*i)), ]
edgesHlp <- rbind(edgesHlp, edgesOK)
}
for (i in 1:numAxes)
{
edgesOK <- edges[((edges$axis.x == (2*i - 1)) & (edges$axis.y == 2*i)) | ((edges$axis.x == 2*i) & (edges$axis.y == (2*i - 1))), ]
edgesHlp <- rbind(edgesHlp, edgesOK)
}
edges <- edgesHlp[, 1:4]
unique.ids <- unique(c(edges$id1, edges$id2))
nodes <- nodes[nodes$id %in% unique.ids, ]
# Check if the new number of axes is 2 times larger than old one
# if not, we need to adjust axis numbers
nodesAxis.new <- sort(unique(nodes$axis))
if(length(nodesAxis.new) != 2*numAxes)
for (i in 1:length(nodesAxis.new))
if (i != nodesAxis.new[i]){
nodes[which(nodes$axis == nodesAxis.new[i]), "axis"] <- i
}
} ##### end of option == "axis <- split"
### ++++++++++++++++++++++++++++++++++++++++++++++++++++ ###
# Final assembly and checking...
HPD$edges <- edges
HPD$nodes <- nodes
chkHPD(HPD)
HPD
}
library("igraph")
library("plyr")
library("HiveR")
library("RColorBrewer")
############################################################################################
rm(list = ls())
dataSet <- read.table("./inst/lesmis.txt", header = FALSE, sep = "\t")
############################################################################################
# Create a graph. Use simplify to ensure that there are no duplicated edges or self loops
gD <- simplify(graph.data.frame(dataSet, directed=FALSE))
# Print number of nodes and edges
# vcount(gD)
# ecount(gD)
# Calculate some node properties and node similarities that will be used to illustrate
# different plotting abilities
# Calculate degree for all nodes
degAll <- degree(gD, v = V(gD), mode = "all")
# Calculate betweenness for all nodes
betAll <- betweenness(gD, v = V(gD), directed = FALSE) / (((vcount(gD) - 1) * (vcount(gD)-2)) / 2)
betAll.norm <- (betAll - min(betAll))/(max(betAll) - min(betAll))
gD <- set.vertex.attribute(gD, "degree", index = V(gD), value = degAll)
gD <- set.vertex.attribute(gD, "betweenness", index = V(gD), value = betAll.norm)
# Check the attributes
# summary(gD)
gD <- set.edge.attribute(gD, "weight", index = E(gD), value = 0)
gD <- set.edge.attribute(gD, "similarity", index = E(gD), value = 0)
# Calculate Dice similarities between all pairs of nodes
dsAll <- similarity.dice(gD, vids = V(gD), mode = "all")
# Calculate edge weight based on the node similarity
F1 <- function(x) {data.frame(V4 = dsAll[which(V(gD)$name == as.character(x$V1)), which(V(gD)$name == as.character(x$V2))])}
dataSet.ext <- ddply(dataSet, .variables=c("V1", "V2", "V3"), function(x) data.frame(F1(x)))
for (i in 1:nrow(dataSet.ext))
{
E(gD)[as.character(dataSet.ext$V1) %--% as.character(dataSet.ext$V2)]$weight <- as.numeric(dataSet.ext$V3)
E(gD)[as.character(dataSet.ext$V1) %--% as.character(dataSet.ext$V2)]$similarity <- as.numeric(dataSet.ext$V4)
}
rm(degAll, betAll, betAll.norm, F1, dsAll, i)
############################################################################################
#Determine node/edge color based on the properties
# Calculate node size
# We'll interpolate node size based on the node betweenness centrality, using the "approx" function
# And we will assign a node size for each node based on its betweenness centrality
approxVals <- approx(c(0.5, 1.5), n = length(unique(V(gD)$betweenness)))
nodes_size <- sapply(V(gD)$betweenness, function(x) approxVals$y[which(sort(unique(V(gD)$betweenness)) == x)])
rm(approxVals)
# Define node color
# We'll interpolate node colors based on the node degree using the "colorRampPalette" function from the "grDevices" library
library("grDevices")
# This function returns a function corresponding to a collor palete of "bias" number of elements
F2 <- colorRampPalette(c("#F5DEB3", "#FF0000"), bias = length(unique(V(gD)$degree)), space = "rgb", interpolate = "linear")
# Now we'll create a color for each degree
colCodes <- F2(length(unique(V(gD)$degree)))
# And we will assign a color for each node based on its degree
nodes_col <- sapply(V(gD)$degree, function(x) colCodes[which(sort(unique(V(gD)$degree)) == x)])
rm(F2, colCodes)
# Assign visual attributes to edges using the same approach as we did for nodes
F2 <- colorRampPalette(c("#FFFF00", "#006400"), bias = length(unique(E(gD)$similarity)), space = "rgb", interpolate = "linear")
colCodes <- F2(length(unique(E(gD)$similarity)))
edges_col <- sapply(E(gD)$similarity, function(x) colCodes[which(sort(unique(E(gD)$similarity)) == x)])
rm(F2, colCodes)
############################################################################################
# Now the new (HiveR) part
# Create a hive plot from the data frame
hive1 <- edge2HPD(edge_df = dataSet.ext)
#sumHPD(hive1)
# Assign nodes to a radius based on their degree (number of edges they are touching)
hive2 <- mineHPD(hive1, option = "rad <- tot.edge.count")
# Assign nodes to axes based on their position in the edge list
# (this function assumes direct graphs, so it considers the first column to be a source and second column to be a sink )
hive3 <- mineHPD(hive2, option = "axis <- source.man.sink")
# Removing zero edges for better visualization
hive4 <- mineHPD(hive3, option = "remove zero edge")
# And finally, plotting our graph (Figure 1)
plotHive(hive4, method = "abs", bkgnd = "white", axLabs = c("source", "hub", "sink"), axLab.pos = 1)
############################################################################################
# Let's do some node/edge customization
# First do nodes
nodes <- hive4$nodes
# Change the node color and size based on node degree and betweenness values
for (i in 1:nrow(nodes))
{
nodes$color[i] <- nodes_col[which(nodes$lab[i] == V(gD)$name)]
nodes$size[i] <- nodes_size[which(nodes$lab[i] == V(gD)$name)]
}
# Reassign these nodes to the hive(4) object
hive4$nodes <- nodes
# And plot it (Figure 2)
plotHive(hive4, method = "abs", bkgnd = "white", axLab.pos = 1)
# Now do the edges
edges <- hive4$edges
# Change the edge color based on Dice similarity
for (i in 1:nrow(edges))
{
index1 <- which(nodes$id == edges$id1[i])
index2 <- which(nodes$id == edges$id2[i])
edges$color[i] <- edges_col[which(E(gD)[as.character(nodes$lab[index1]) %--% as.character(nodes$lab[index2])] == E(gD))]
}
# Reassign these edges to the hive(4) object
hive4$edges <- edges
# And plot it (Figure 3)
plotHive(hive4, method = "abs", bkgnd = "white", axLabs = c("source", "hub", "sink"), axLab.pos = 1)
# Some edges are too thick, so we will reduce the edge weight (thickness) by 25%
hive4$edges$weight <- hive4$edges$weight/4
# And plot it (Figure 5)
plotHive(hive4, method = "abs", bkgnd = "white", axLabs = c("source", "hub", "sink"), axLab.pos = 1)
###############################################
# Now the same using adj2HPD() instead of edge2HPD()
# First, we'll create an adjacency matrix from our graph (gD)
gAdj <- get.adjacency(gD, type = "upper", edges = FALSE, names = TRUE, sparse = FALSE)
# Then we'll create the hive object for it
hive1 <- adj2HPD(gAdj, type = "2D")
# Assign nodes to a radius based on their degree (number of edges they are touching)
hive2 <- mineHPD(hive1, option = "rad <- tot.edge.count")
# Assign nodes to axes based on their position in the edge list
hive3 <- mod.mineHPD(hive2, option = "axis <- source.man.sink")
# In some cases (for undirected graphs), some nodes will not be assigned to any axes
# In those cases, use the function from "mod.mineHPD.R"
#source("mod.mineHPD.R")
#hive3 <- mod.mineHPD(hive2, option = "axis <- source.man.sink")
# Removing zero edges for better visualization
hive4 <- mineHPD(hive3, option = "remove zero edge")
# Node/edge customization is the same as above
#################################################
# Now lets expand the available options and add some new function(alitie)s
# Available in: "mod.mineHPD.R"
source("mod.mineHPD.R")
# Assign nodes to a radius based on the user specified values (in our case betweenness centrality)
hive2 <- mod.mineHPD(hive1, option = "rad <- userDefined", radData = data.frame(nds = V(gD)$name, bc = V(gD)$betweenness))
# Assign nodes to a radius randomly
hive2 <- mod.mineHPD(hive1, option = "rad <- random")
# Assign nodes to axes based on their degree
# Low degrees (1, 2, >2)
hive3 <- mod.mineHPD(hive2, option = "axis <- deg_one_two_more")
# Higer degrees (<=5, 6-10, >10)
hive3 <- mod.mineHPD(hive2, option = "axis <- deg_five_ten_more")
# Split axes - this function splits each of the 3 axes into 2 new axes (thus, resulting in 6 axes)
# and removes edge on the same axis (but it introduces new (duplicated) nodes)
hive4 <- mod.mineHPD(hive3, option = "axis <- split")
#################################################
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment