Skip to content

Instantly share code, notes, and snippets.

@adamsardar
Last active April 7, 2016 10:41
Show Gist options
  • Save adamsardar/3f8fc40ea3a493218fb002f0a22d23bd to your computer and use it in GitHub Desktop.
Save adamsardar/3f8fc40ea3a493218fb002f0a22d23bd to your computer and use it in GitHub Desktop.
A igraph writer for graphml that names nodes with accession fields
pacman::p_load(igraph,stringr,data.table,utils,ensurer,magrittr)
#' Write an igraph object to file in graphML format
#'
#' A frustrating feature of igraph is the lack of ability to affect the nodeID field of an output graphML file.
#' I ended up becoming so frustrated that I just wrote my own.
#'
#' @param graph2Write An igraph object containing a uniquely labelled 'name' vertex attribute (which is used as nodeID)
#' @param filename The filename to write out the graphml file to
#' @return filename The filename of the graphML file
writeETXcompatibelGraphML <- function(graph2Write,filename){
generateETXcompatibelGraphML(graph2Write,filename=filename)
return(filename)
}
#' Internal function for generating graphml
#'
#' My own implementation of a graphML writer. This is not a fancy XML file writer, it just prints out set lines to a file. A bit gross, but it does what it needs to.
#'
#' @param graphToWrite An igraph object containing a uniquely labelled 'name' vertex attribute (which is used as nodeID)
#' @param vertexAttributes A character vector of node properties to write out
#' @param edgeAttributes A character vector of edge properties to write out
#' @param filename The filename to write out the graphml file to (default is to print to console)
generateETXcompatibelGraphML <- function(graphToWrite,vertexAttributes = NULL,edgeAttributes = NULL, filename = ""){
graphToWrite %<>% ensure(is.igraph,
"name" %in% vertex_attr_names(.),
!any(duplicated(V(.)$name)),
err_desc = "An igraph network is required and containing the unique vertex key 'name'")
vertexInfoDT <- graphToWrite %>% get.data.frame(what="vertices") %>% data.table
setkey(vertexInfoDT,name)
edgeInfoDT <- graphToWrite %>% get.data.frame(what="edges") %>% data.table
if(is.null(vertexAttributes)){
vertexAttributes <- colnames(vertexInfoDT)
}else{
vertexAttributes %<>% ensure(all(. %in% colnames(vertexInfoDT)),
err_desc = "All desired vertex names must be vertex properties of the network that you wish to write")
}
if(is.null(edgeAttributes)){
edgeAttributes <- colnames(edgeInfoDT) %>% setdiff(c("from","to"))
}else{
edgeAttributes %<>% ensure(all(. %in% colnames(edgeInfoDT)),
err_desc = "All desired vertex names must be vertex properties of the network that you wish to write")
}
## Header stuff
cat('<?xml version="1.0" encoding="UTF-8"?>\n<graphml xmlns="http://graphml.graphdrawing.org/xmlns"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">\n',file = filename)
cat('<!-- Created By Gist Version Of writeETXcompatibelGraphML',file = filename,append=TRUE)
cat(' -->\n',file = filename,append=TRUE)
graphAttrs <- graph.attributes(graphToWrite)
for(graphAttributes in names(graphAttrs)){
cat(' <key id="',file = filename,append=TRUE)
cat(graphAttributes,file = filename,append=TRUE)
cat('" for="graph" attr.name="',file = filename,append=TRUE)
cat(graphAttributes,file = filename,append=TRUE)
cat('" attr.type="',file = filename,append=TRUE)
if(is.numeric(get.vertex.attribute(graphToWrite,graphAttributes))){
cat('double',file = filename,append=TRUE)
}else if(is.logical(get.vertex.attribute(graphToWrite,graphAttributes))){
cat('boolean',file = filename,append=TRUE)
}else{
cat('string',file = filename,append=TRUE)
}
cat('"/>\n',file = filename,append=TRUE)
}
for(nodeAttribute in vertexAttributes){
cat(' <key id="',file = filename,append=TRUE)
cat(nodeAttribute,file = filename,append=TRUE)
cat('" for="node" attr.name="',file = filename,append=TRUE)
cat(nodeAttribute,file = filename,append=TRUE)
cat('" attr.type="',file = filename,append=TRUE)
if(is.numeric(vertexInfoDT[,get(nodeAttribute)])){
cat('double',file = filename,append=TRUE)
}else if(is.logical(vertexInfoDT[,get(nodeAttribute)])){
cat('boolean',file = filename,append=TRUE)
}else{
cat('string',file = filename,append=TRUE)
}
cat('"/>\n',file = filename,append=TRUE)
}
#When it comes to writing out data, we want everything to be a character
vertexInfoDT %<>% .[,lapply(.SD,as.character)]
for(edgeAttribute in edgeAttributes){
cat(' <key id="',file = filename,append=TRUE)
cat(edgeAttribute,file = filename,append=TRUE)
cat('" for="edge" attr.name="',file = filename,append=TRUE)
cat(edgeAttribute,file = filename,append=TRUE)
cat('" attr.type="',file = filename,append=TRUE)
if(is.numeric(edgeInfoDT[,get(edgeAttribute)])){
cat('double',file = filename,append=TRUE)
}else if(is.logical(edgeInfoDT[,get(edgeAttribute)])){
cat('boolean',file = filename,append=TRUE)
}else{
cat('string',file = filename,append=TRUE)
}
cat('"/>\n',file = filename,append=TRUE)
}
edgeInfoDT %<>% .[,lapply(.SD,as.character)]
cat(' <graph id="G" edgedefault="',file = filename,append=TRUE)
if(is.directed(graphToWrite)){
cat('directed',file = filename,append=TRUE)
}else{
cat('undirected',file = filename,append=TRUE)
}
cat('">\n',file = filename,append=TRUE)
for(graphAttributes in names(graphAttrs)){
cat(' \t<data key="',file = filename,append=TRUE)
cat(graphAttributes,file = filename,append=TRUE)
cat('">',file = filename,append=TRUE)
cat(graphAttrs[[graphAttributes]],file = filename,append=TRUE)
cat('</data>\n',file = filename,append=TRUE)
}
###Body of graphml
#Nodes
vertexInfoOut <- function(vertexEntry){
cat(str_c('\t<node id="',vertexEntry,'">\n'),file = filename,append=TRUE)
return(numeric())
}
vertexPropertiesOut <- function(variable,value){
if(any(is.na(value))){value[which(is.na(value))] <- "NA"}
cat(str_c('\t <data key="',variable,'">',value,'</data>\n'),file = filename,append=TRUE)
return(numeric())
}
vertexClosureOut <- function(){
cat('\t</node>\n',file = filename,append=TRUE)
return(numeric())}
if(vcount(graphToWrite) > 0){
vertexInfoDT$.name <- "temp"
vertexInfoDT[,.name := name]
vertexInfoDT %>%
melt(id=".name",verbose=FALSE) %>%
.[,lapply(.SD,as.character)] %>%
.[,.(.SD[,vertexInfoOut(.name)],
.SD[,vertexPropertiesOut(variable,value)],
.SD[,vertexClosureOut()]),
by=.name]
vertexInfoDT[,.name := NULL]
}
#Edges
edgeInfoOut <- function(from,to){
cat(str_c('\t<edge source="',from,'" target="',to,'">\n'),file = filename,append=TRUE)
return(numeric())}
edgePropetiesOut <- function(variable,value){
if(any(is.na(value))){value[which(is.na(value))] <- "NA"}
cat(str_c('\t <data key="',variable,'">',value,'</data>\n'),file = filename,append=TRUE)
return(numeric())}
edgeClosureOut <- function(){
cat('\t</edge>\n',file = filename,append=TRUE)
return(numeric())}
if(ecount(graphToWrite) > 0){
if(length(edgeAttributes) > 0){
edgeInfoDT %>%
melt(id=c("from","to"),verbose=FALSE) %>%
.[,lapply(.SD,as.character)] %>%
.[,.(.SD[,edgeInfoOut(from,to)],
.SD[,edgePropetiesOut(variable,value)],
.SD[,edgeClosureOut()]),
by=list(from,to)]
}else{
edgeInfoDT %>%
.[,lapply(.SD,as.character)] %>%
.[,.(.SD[,edgeInfoOut(from,to)],
.SD[,edgeClosureOut()]),
by=list(from,to)]
}
}
#Tail section
cat(' </graph>\n',file = filename,append=TRUE)
cat('</graphml>\n',file = filename,append=TRUE)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment